[
  {
    "path": ".gitignore",
    "content": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*.vsix\n*$py.class\n*.ipynb\n*.zip\n# C extensions\n*.so\n*.npz\n*.npy\n# Distribution / packaging\n.Python\n*.mp4\n*.pth\n*.jpg\n*.jpeg\n*.png\n*.log\n*.json\n*.csv\nckpts\nwork_dirs\nnuscenes-mini\nnuscenes-mini/\nwork_dirs/\nwork_dirs_/\ndata/\ntests/\ntest/\ntest2/\nval/\nckpts/\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nwandb/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n.hypothesis/\n.pytest_cache/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# pyenv\n.python-version\n\n# celery beat schedule file\ncelerybeat-schedule\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\nbarrier\nbicycle\nbus\ncar\nconstruction_vehicle\ndriveable_surface\nmanmade\nmotorcycle\nother_flat\nothers\npedestrian\nper\nsidewalk\nterrain\ntraffic_cone\ntrailer\ntruck\nvegetation\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n\n# cython generated cpp\ndata\n.vscode\n.idea\n\n# custom\n*.pkl\n*.pkl.json\n*.log.json\nwork_dirs/\nexps/\n*~\nmmdet3d/.mim\n\n# Pytorch\n*.pth\n\n# demo\n*.jpg\n*.png\ndata/s3dis/Stanford3dDataset_v1.2_Aligned_Version/\ndata/scannet/scans/\ndata/sunrgbd/OFFICIAL_SUNRGBD/\n*.obj\n*.ply\n*.pdf\n\n# Waymo evaluation\nmmdet3d/core/evaluation/waymo_utils/compute_detection_metrics_main\n"
  },
  {
    "path": "README.md",
    "content": "# Is Ego Status All You Need for Open-Loop End-to-End Autonomous Driving?\n\n### [arXiv](http://arxiv.org/abs/2312.03031) | [知乎](https://zhuanlan.zhihu.com/p/669454065)\n\nhttps://github.com/NVlabs/BEV-Planner/assets/27915819/93afa127-813f-4d36-b4f2-84f6b8d9b905\n\n## INTRODUCTION\nEnd-to-end autonomous driving recently emerged as a promising research direction to target autonomy from a full-stack perspective. Along this line, many of the latest works follow an open-loop evaluation setting on nuScenes to study the planning behavior. In this paper, we delve deeper into the problem by conducting thorough analyses and demystifying more devils in the details. We initially observed that the nuScenes dataset, characterized by relatively simple driving scenarios, leads to an under-utilization of perception information in end-to-end models incorporating ego status, such as the ego vehicle's velocity. These models tend to rely predominantly on the ego vehicle's status for future path planning. \nBeyond the limitations of the dataset, we also note that current metrics do not comprehensively assess the planning quality, leading to potentially biased conclusions drawn from existing benchmarks. To address this issue, we introduce a new metric to evaluate whether the predicted trajectories adhere to the road. \nWe further propose a simple baseline able to achieve competitive results without relying on perception annotations.\nGiven the current limitations on the benchmark and metrics, we suggest the community reassess relevant prevailing research and be cautious whether the continued pursuit of state-of-the-art would yield convincing and universal conclusions.\n\n\n## Start\n### 1.Setting up Environment\n### 2.Preparing Dataset\n### 3.Training\n\n### 4.Eval"
  },
  {
    "path": "configs/_base_/datasets/coco_instance.py",
    "content": "dataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        pipeline=train_pipeline),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline))\nevaluation = dict(metric=['bbox', 'segm'])\n"
  },
  {
    "path": "configs/_base_/datasets/kitti-3d-3class.py",
    "content": "# dataset settings\ndataset_type = 'KittiDataset'\ndata_root = 'data/kitti/'\nclass_names = ['Pedestrian', 'Cyclist', 'Car']\npoint_cloud_range = [0, -40, -3, 70.4, 40, 1]\ninput_modality = dict(use_lidar=True, use_camera=False)\n\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel',\n#     path_mapping=dict({\n#         './data/kitti/':\n#         's3://openmmlab/datasets/detection3d/kitti/',\n#         'data/kitti/':\n#         's3://openmmlab/datasets/detection3d/kitti/'\n#     }))\n\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'kitti_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(\n        filter_by_difficulty=[-1],\n        filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),\n    classes=class_names,\n    sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6),\n    points_loader=dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=4,\n        use_dim=4,\n        file_client_args=file_client_args),\n    file_client_args=file_client_args)\n\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=4,\n        use_dim=4,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox_3d=True,\n        with_label_3d=True,\n        file_client_args=file_client_args),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(\n        type='ObjectNoise',\n        num_try=100,\n        translation_std=[1.0, 1.0, 0.5],\n        global_rot_range=[0.0, 0.0],\n        rot_range=[-0.78539816, 0.78539816]),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.95, 1.05]),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=4,\n        use_dim=4,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=4,\n        use_dim=4,\n        file_client_args=file_client_args),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['points'])\n]\n\ndata = dict(\n    samples_per_gpu=6,\n    workers_per_gpu=4,\n    train=dict(\n        type='RepeatDataset',\n        times=2,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'kitti_infos_train.pkl',\n            split='training',\n            pts_prefix='velodyne_reduced',\n            pipeline=train_pipeline,\n            modality=input_modality,\n            classes=class_names,\n            test_mode=False,\n            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n            # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n            box_type_3d='LiDAR',\n            file_client_args=file_client_args)),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR',\n        file_client_args=file_client_args),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR',\n        file_client_args=file_client_args))\n\nevaluation = dict(interval=1, pipeline=eval_pipeline)\n"
  },
  {
    "path": "configs/_base_/datasets/kitti-3d-car.py",
    "content": "# dataset settings\ndataset_type = 'KittiDataset'\ndata_root = 'data/kitti/'\nclass_names = ['Car']\npoint_cloud_range = [0, -40, -3, 70.4, 40, 1]\ninput_modality = dict(use_lidar=True, use_camera=False)\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'kitti_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),\n    classes=class_names,\n    sample_groups=dict(Car=15))\n\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel', path_mapping=dict(data='s3://kitti_data/'))\n\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=4,\n        use_dim=4,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox_3d=True,\n        with_label_3d=True,\n        file_client_args=file_client_args),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(\n        type='ObjectNoise',\n        num_try=100,\n        translation_std=[1.0, 1.0, 0.5],\n        global_rot_range=[0.0, 0.0],\n        rot_range=[-0.78539816, 0.78539816]),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.95, 1.05]),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=4,\n        use_dim=4,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=4,\n        use_dim=4,\n        file_client_args=file_client_args),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['points'])\n]\n\ndata = dict(\n    samples_per_gpu=6,\n    workers_per_gpu=4,\n    train=dict(\n        type='RepeatDataset',\n        times=2,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'kitti_infos_train.pkl',\n            split='training',\n            pts_prefix='velodyne_reduced',\n            pipeline=train_pipeline,\n            modality=input_modality,\n            classes=class_names,\n            test_mode=False,\n            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n            # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n            box_type_3d='LiDAR')),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'))\n\nevaluation = dict(interval=1, pipeline=eval_pipeline)\n"
  },
  {
    "path": "configs/_base_/datasets/kitti-mono3d.py",
    "content": "dataset_type = 'KittiMonoDataset'\ndata_root = 'data/kitti/'\nclass_names = ['Pedestrian', 'Cyclist', 'Car']\ninput_modality = dict(use_lidar=False, use_camera=True)\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFileMono3D'),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox=True,\n        with_label=True,\n        with_attr_label=False,\n        with_bbox_3d=True,\n        with_label_3d=True,\n        with_bbox_depth=True),\n    dict(type='Resize', img_scale=(1242, 375), keep_ratio=True),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(\n        type='Collect3D',\n        keys=[\n            'img', 'gt_bboxes', 'gt_labels', 'gt_bboxes_3d', 'gt_labels_3d',\n            'centers2d', 'depths'\n        ]),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFileMono3D'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1242, 375),\n        flip=False,\n        transforms=[\n            dict(type='RandomFlip3D'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['img']),\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\neval_pipeline = [\n    dict(type='LoadImageFromFileMono3D'),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['img'])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_train_mono3d.coco.json',\n        info_file=data_root + 'kitti_infos_train.pkl',\n        img_prefix=data_root,\n        classes=class_names,\n        pipeline=train_pipeline,\n        modality=input_modality,\n        test_mode=False,\n        box_type_3d='Camera'),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val_mono3d.coco.json',\n        info_file=data_root + 'kitti_infos_val.pkl',\n        img_prefix=data_root,\n        classes=class_names,\n        pipeline=test_pipeline,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='Camera'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val_mono3d.coco.json',\n        info_file=data_root + 'kitti_infos_val.pkl',\n        img_prefix=data_root,\n        classes=class_names,\n        pipeline=test_pipeline,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='Camera'))\nevaluation = dict(interval=2)\n"
  },
  {
    "path": "configs/_base_/datasets/lyft-3d.py",
    "content": "# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\npoint_cloud_range = [-80, -80, -5, 80, 80, 3]\n# For Lyft we usually do 9-class detection\nclass_names = [\n    'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',\n    'bicycle', 'pedestrian', 'animal'\n]\ndataset_type = 'LyftDataset'\ndata_root = 'data/lyft/'\n# Input modality for Lyft dataset, this is consistent with the submission\n# format which requires the information in input_modality.\ninput_modality = dict(\n    use_lidar=True,\n    use_camera=False,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel',\n#     path_mapping=dict({\n#         './data/lyft/': 's3://lyft/lyft/',\n#         'data/lyft/': 's3://lyft/lyft/'\n#    }))\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.3925, 0.3925],\n        scale_ratio_range=[0.95, 1.05],\n        translation_std=[0, 0, 0]),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['points'])\n]\n\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'lyft_infos_train.pkl',\n        pipeline=train_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=False),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'lyft_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'lyft_infos_test.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True))\n# For Lyft dataset, we usually evaluate the model at the end of training.\n# Since the models are trained by 24 epochs by default, we set evaluation\n# interval to be 24. Please change the interval accordingly if you do not\n# use a default schedule.\nevaluation = dict(interval=24, pipeline=eval_pipeline)\n"
  },
  {
    "path": "configs/_base_/datasets/nuim_instance.py",
    "content": "dataset_type = 'CocoDataset'\ndata_root = 'data/nuimages/'\nclass_names = [\n    'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',\n    'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'\n]\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1280, 720), (1920, 1080)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1600, 900),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/nuimages_v1.0-train.json',\n        img_prefix=data_root,\n        classes=class_names,\n        pipeline=train_pipeline),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/nuimages_v1.0-val.json',\n        img_prefix=data_root,\n        classes=class_names,\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/nuimages_v1.0-val.json',\n        img_prefix=data_root,\n        classes=class_names,\n        pipeline=test_pipeline))\nevaluation = dict(metric=['bbox', 'segm'])\n"
  },
  {
    "path": "configs/_base_/datasets/nus-3d.py",
    "content": "# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\npoint_cloud_range = [-50, -50, -5, 50, 50, 3]\n# For nuScenes we usually do 10-class detection\nclass_names = [\n    'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',\n    'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'\n]\ndataset_type = 'NuScenesDataset'\ndata_root = 'data/nuscenes/'\n# Input modality for nuScenes dataset, this is consistent with the submission\n# format which requires the information in input_modality.\ninput_modality = dict(\n    use_lidar=True,\n    use_camera=False,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel',\n#     path_mapping=dict({\n#         './data/nuscenes/': 's3://nuscenes/nuscenes/',\n#         'data/nuscenes/': 's3://nuscenes/nuscenes/'\n#     }))\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.3925, 0.3925],\n        scale_ratio_range=[0.95, 1.05],\n        translation_std=[0, 0, 0]),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['points'])\n]\n\ndata = dict(\n    samples_per_gpu=4,\n    workers_per_gpu=4,\n    test_dataloader=dict(runner_type='EpochBasedRunner'),\n    train=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'nuscenes_infos_train.pkl',\n        pipeline=train_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=False,\n        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n        # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n        box_type_3d='LiDAR'),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'nuscenes_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='LiDAR'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'nuscenes_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='LiDAR'))\n# For nuScenes dataset, we usually evaluate the model at the end of training.\n# Since the models are trained by 24 epochs by default, we set evaluation\n# interval to be 24. Please change the interval accordingly if you do not\n# use a default schedule.\nevaluation = dict(interval=24, pipeline=eval_pipeline)\n"
  },
  {
    "path": "configs/_base_/datasets/nus-mono3d.py",
    "content": "dataset_type = 'NuScenesMonoDataset'\ndata_root = 'data/nuscenes/'\nclass_names = [\n    'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',\n    'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'\n]\n# Input modality for nuScenes dataset, this is consistent with the submission\n# format which requires the information in input_modality.\ninput_modality = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFileMono3D'),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox=True,\n        with_label=True,\n        with_attr_label=True,\n        with_bbox_3d=True,\n        with_label_3d=True,\n        with_bbox_depth=True),\n    dict(type='Resize', img_scale=(1600, 900), keep_ratio=True),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(\n        type='Collect3D',\n        keys=[\n            'img', 'gt_bboxes', 'gt_labels', 'attr_labels', 'gt_bboxes_3d',\n            'gt_labels_3d', 'centers2d', 'depths'\n        ]),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFileMono3D'),\n    dict(\n        type='MultiScaleFlipAug',\n        scale_factor=1.0,\n        flip=False,\n        transforms=[\n            dict(type='RandomFlip3D'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['img']),\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\neval_pipeline = [\n    dict(type='LoadImageFromFileMono3D'),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['img'])\n]\n\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'nuscenes_infos_train_mono3d.coco.json',\n        img_prefix=data_root,\n        classes=class_names,\n        pipeline=train_pipeline,\n        modality=input_modality,\n        test_mode=False,\n        box_type_3d='Camera'),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',\n        img_prefix=data_root,\n        classes=class_names,\n        pipeline=test_pipeline,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='Camera'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',\n        img_prefix=data_root,\n        classes=class_names,\n        pipeline=test_pipeline,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='Camera'))\nevaluation = dict(interval=2)\n"
  },
  {
    "path": "configs/_base_/datasets/range100_lyft-3d.py",
    "content": "# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\npoint_cloud_range = [-100, -100, -5, 100, 100, 3]\n# For Lyft we usually do 9-class detection\nclass_names = [\n    'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',\n    'bicycle', 'pedestrian', 'animal'\n]\ndataset_type = 'LyftDataset'\ndata_root = 'data/lyft/'\n# Input modality for Lyft dataset, this is consistent with the submission\n# format which requires the information in input_modality.\ninput_modality = dict(\n    use_lidar=True,\n    use_camera=False,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel',\n#     path_mapping=dict({\n#         './data/lyft/': 's3://lyft/lyft/',\n#         'data/lyft/': 's3://lyft/lyft/'\n#    }))\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.3925, 0.3925],\n        scale_ratio_range=[0.95, 1.05],\n        translation_std=[0, 0, 0]),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['points'])\n]\n\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'lyft_infos_train.pkl',\n        pipeline=train_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=False),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'lyft_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'lyft_infos_test.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True))\n# For Lyft dataset, we usually evaluate the model at the end of training.\n# Since the models are trained by 24 epochs by default, we set evaluation\n# interval to be 24. Please change the interval accordingly if you do not\n# use a default schedule.\nevaluation = dict(interval=24, pipeline=eval_pipeline)\n"
  },
  {
    "path": "configs/_base_/datasets/s3dis-3d-5class.py",
    "content": "# dataset settings\ndataset_type = 'S3DISDataset'\ndata_root = './data/s3dis/'\nclass_names = ('table', 'chair', 'sofa', 'bookcase', 'board')\ntrain_area = [1, 2, 3, 4, 6]\ntest_area = 5\n\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=True,\n        load_dim=6,\n        use_dim=[0, 1, 2, 3, 4, 5]),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(type='PointSample', num_points=40000),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n        flip_ratio_bev_vertical=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        # following ScanNet dataset the rotation range is 5 degrees\n        rot_range=[-0.087266, 0.087266],\n        scale_ratio_range=[1.0, 1.0],\n        shift_height=True),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=True,\n        load_dim=6,\n        use_dim=[0, 1, 2, 3, 4, 5]),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(\n                type='RandomFlip3D',\n                sync_2d=False,\n                flip_ratio_bev_horizontal=0.5,\n                flip_ratio_bev_vertical=0.5),\n            dict(type='PointSample', num_points=40000),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=False,\n        load_dim=6,\n        use_dim=[0, 1, 2, 3, 4, 5]),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['points'])\n]\n\ndata = dict(\n    samples_per_gpu=8,\n    workers_per_gpu=4,\n    train=dict(\n        type='RepeatDataset',\n        times=5,\n        dataset=dict(\n            type='ConcatDataset',\n            datasets=[\n                dict(\n                    type=dataset_type,\n                    data_root=data_root,\n                    ann_file=data_root + f's3dis_infos_Area_{i}.pkl',\n                    pipeline=train_pipeline,\n                    filter_empty_gt=False,\n                    classes=class_names,\n                    box_type_3d='Depth') for i in train_area\n            ],\n            separate_eval=False)),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='Depth'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='Depth'))\n\nevaluation = dict(pipeline=eval_pipeline)\n"
  },
  {
    "path": "configs/_base_/datasets/s3dis_seg-3d-13class.py",
    "content": "# dataset settings\ndataset_type = 'S3DISSegDataset'\ndata_root = './data/s3dis/'\nclass_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door',\n               'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter')\n\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel',\n#     path_mapping=dict({\n#         './data/s3dis/':\n#         's3://openmmlab/datasets/detection3d/s3dis_processed/',\n#         'data/s3dis/':\n#         's3://openmmlab/datasets/detection3d/s3dis_processed/'\n#     }))\n\nnum_points = 4096\ntrain_area = [1, 2, 3, 4, 6]\ntest_area = 5\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        file_client_args=file_client_args,\n        coord_type='DEPTH',\n        shift_height=False,\n        use_color=True,\n        load_dim=6,\n        use_dim=[0, 1, 2, 3, 4, 5]),\n    dict(\n        type='LoadAnnotations3D',\n        file_client_args=file_client_args,\n        with_bbox_3d=False,\n        with_label_3d=False,\n        with_mask_3d=False,\n        with_seg_3d=True),\n    dict(\n        type='PointSegClassMapping',\n        valid_cat_ids=tuple(range(len(class_names))),\n        max_cat_id=13),\n    dict(\n        type='IndoorPatchPointSample',\n        num_points=num_points,\n        block_size=1.0,\n        ignore_index=len(class_names),\n        use_normalized_coord=True,\n        enlarge_size=0.2,\n        min_unique_num=None),\n    dict(type='NormalizePointsColor', color_mean=None),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        file_client_args=file_client_args,\n        coord_type='DEPTH',\n        shift_height=False,\n        use_color=True,\n        load_dim=6,\n        use_dim=[0, 1, 2, 3, 4, 5]),\n    dict(type='NormalizePointsColor', color_mean=None),\n    dict(\n        # a wrapper in order to successfully call test function\n        # actually we don't perform test-time-aug\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(\n                type='RandomFlip3D',\n                sync_2d=False,\n                flip_ratio_bev_horizontal=0.0,\n                flip_ratio_bev_vertical=0.0),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\n# we need to load gt seg_mask!\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        file_client_args=file_client_args,\n        coord_type='DEPTH',\n        shift_height=False,\n        use_color=True,\n        load_dim=6,\n        use_dim=[0, 1, 2, 3, 4, 5]),\n    dict(\n        type='LoadAnnotations3D',\n        file_client_args=file_client_args,\n        with_bbox_3d=False,\n        with_label_3d=False,\n        with_mask_3d=False,\n        with_seg_3d=True),\n    dict(\n        type='PointSegClassMapping',\n        valid_cat_ids=tuple(range(len(class_names))),\n        max_cat_id=13),\n    dict(\n        type='DefaultFormatBundle3D',\n        with_label=False,\n        class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])\n]\n\ndata = dict(\n    samples_per_gpu=8,\n    workers_per_gpu=4,\n    # train on area 1, 2, 3, 4, 6\n    # test on area 5\n    train=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_files=[\n            data_root + f's3dis_infos_Area_{i}.pkl' for i in train_area\n        ],\n        pipeline=train_pipeline,\n        classes=class_names,\n        test_mode=False,\n        ignore_index=len(class_names),\n        scene_idxs=[\n            data_root + f'seg_info/Area_{i}_resampled_scene_idxs.npy'\n            for i in train_area\n        ],\n        file_client_args=file_client_args),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        test_mode=True,\n        ignore_index=len(class_names),\n        scene_idxs=data_root +\n        f'seg_info/Area_{test_area}_resampled_scene_idxs.npy',\n        file_client_args=file_client_args),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        test_mode=True,\n        ignore_index=len(class_names),\n        file_client_args=file_client_args))\n\nevaluation = dict(pipeline=eval_pipeline)\n"
  },
  {
    "path": "configs/_base_/datasets/scannet-3d-18class.py",
    "content": "# dataset settings\ndataset_type = 'ScanNetDataset'\ndata_root = './data/scannet/'\nclass_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',\n               'bookshelf', 'picture', 'counter', 'desk', 'curtain',\n               'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',\n               'garbagebin')\n\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel',\n#     path_mapping=dict({\n#         './data/scannet/':\n#         's3://openmmlab/datasets/detection3d/scannet_processed/',\n#         'data/scannet/':\n#         's3://openmmlab/datasets/detection3d/scannet_processed/'\n#     }))\n\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        file_client_args=file_client_args,\n        coord_type='DEPTH',\n        shift_height=True,\n        load_dim=6,\n        use_dim=[0, 1, 2]),\n    dict(\n        type='LoadAnnotations3D',\n        file_client_args=file_client_args,\n        with_bbox_3d=True,\n        with_label_3d=True,\n        with_mask_3d=True,\n        with_seg_3d=True),\n    dict(type='GlobalAlignment', rotation_axis=2),\n    dict(\n        type='PointSegClassMapping',\n        valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34,\n                       36, 39),\n        max_cat_id=40),\n    dict(type='PointSample', num_points=40000),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n        flip_ratio_bev_vertical=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.087266, 0.087266],\n        scale_ratio_range=[1.0, 1.0],\n        shift_height=True),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(\n        type='Collect3D',\n        keys=[\n            'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',\n            'pts_instance_mask'\n        ])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        file_client_args=file_client_args,\n        coord_type='DEPTH',\n        shift_height=True,\n        load_dim=6,\n        use_dim=[0, 1, 2]),\n    dict(type='GlobalAlignment', rotation_axis=2),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(\n                type='RandomFlip3D',\n                sync_2d=False,\n                flip_ratio_bev_horizontal=0.5,\n                flip_ratio_bev_vertical=0.5),\n            dict(type='PointSample', num_points=40000),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        file_client_args=file_client_args,\n        coord_type='DEPTH',\n        shift_height=False,\n        load_dim=6,\n        use_dim=[0, 1, 2]),\n    dict(type='GlobalAlignment', rotation_axis=2),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['points'])\n]\n\ndata = dict(\n    samples_per_gpu=8,\n    workers_per_gpu=4,\n    train=dict(\n        type='RepeatDataset',\n        times=5,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'scannet_infos_train.pkl',\n            pipeline=train_pipeline,\n            filter_empty_gt=False,\n            classes=class_names,\n            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n            # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n            box_type_3d='Depth',\n            file_client_args=file_client_args)),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'scannet_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='Depth',\n        file_client_args=file_client_args),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'scannet_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='Depth',\n        file_client_args=file_client_args))\n\nevaluation = dict(pipeline=eval_pipeline)\n"
  },
  {
    "path": "configs/_base_/datasets/scannet_seg-3d-20class.py",
    "content": "# dataset settings\ndataset_type = 'ScanNetSegDataset'\ndata_root = './data/scannet/'\nclass_names = ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table',\n               'door', 'window', 'bookshelf', 'picture', 'counter', 'desk',\n               'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink',\n               'bathtub', 'otherfurniture')\nnum_points = 8192\n\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel',\n#     path_mapping=dict({\n#         './data/scannet/':\n#         's3://openmmlab/datasets/detection3d/scannet_processed/',\n#         'data/scannet/':\n#         's3://openmmlab/datasets/detection3d/scannet_processed/'\n#     }))\n\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=False,\n        use_color=True,\n        load_dim=6,\n        use_dim=[0, 1, 2, 3, 4, 5],\n        file_client_args=file_client_args),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox_3d=False,\n        with_label_3d=False,\n        with_mask_3d=False,\n        with_seg_3d=True,\n        file_client_args=file_client_args),\n    dict(\n        type='PointSegClassMapping',\n        valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,\n                       33, 34, 36, 39),\n        max_cat_id=40),\n    dict(\n        type='IndoorPatchPointSample',\n        num_points=num_points,\n        block_size=1.5,\n        ignore_index=len(class_names),\n        use_normalized_coord=False,\n        enlarge_size=0.2,\n        min_unique_num=None),\n    dict(type='NormalizePointsColor', color_mean=None),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=False,\n        use_color=True,\n        load_dim=6,\n        use_dim=[0, 1, 2, 3, 4, 5],\n        file_client_args=file_client_args),\n    dict(type='NormalizePointsColor', color_mean=None),\n    dict(\n        # a wrapper in order to successfully call test function\n        # actually we don't perform test-time-aug\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(\n                type='RandomFlip3D',\n                sync_2d=False,\n                flip_ratio_bev_horizontal=0.0,\n                flip_ratio_bev_vertical=0.0),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\n# we need to load gt seg_mask!\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=False,\n        use_color=True,\n        load_dim=6,\n        use_dim=[0, 1, 2, 3, 4, 5],\n        file_client_args=file_client_args),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox_3d=False,\n        with_label_3d=False,\n        with_mask_3d=False,\n        with_seg_3d=True,\n        file_client_args=file_client_args),\n    dict(\n        type='PointSegClassMapping',\n        valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,\n                       33, 34, 36, 39),\n        max_cat_id=40),\n    dict(\n        type='DefaultFormatBundle3D',\n        with_label=False,\n        class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])\n]\n\ndata = dict(\n    samples_per_gpu=8,\n    workers_per_gpu=4,\n    train=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'scannet_infos_train.pkl',\n        pipeline=train_pipeline,\n        classes=class_names,\n        test_mode=False,\n        ignore_index=len(class_names),\n        scene_idxs=data_root + 'seg_info/train_resampled_scene_idxs.npy',\n        file_client_args=file_client_args),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'scannet_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        test_mode=True,\n        ignore_index=len(class_names),\n        file_client_args=file_client_args),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'scannet_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        test_mode=True,\n        ignore_index=len(class_names),\n        file_client_args=file_client_args))\n\nevaluation = dict(pipeline=eval_pipeline)\n"
  },
  {
    "path": "configs/_base_/datasets/sunrgbd-3d-10class.py",
    "content": "dataset_type = 'SUNRGBDDataset'\ndata_root = 'data/sunrgbd/'\nclass_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',\n               'night_stand', 'bookshelf', 'bathtub')\n\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel',\n#     path_mapping=dict({\n#         './data/sunrgbd/':\n#         's3://openmmlab/datasets/detection3d/sunrgbd_processed/',\n#         'data/sunrgbd/':\n#         's3://openmmlab/datasets/detection3d/sunrgbd_processed/'\n#     }))\n\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=True,\n        load_dim=6,\n        use_dim=[0, 1, 2],\n        file_client_args=file_client_args),\n    dict(type='LoadAnnotations3D', file_client_args=file_client_args),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n    ),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.523599, 0.523599],\n        scale_ratio_range=[0.85, 1.15],\n        shift_height=True),\n    dict(type='PointSample', num_points=20000),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=True,\n        load_dim=6,\n        use_dim=[0, 1, 2],\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(\n                type='RandomFlip3D',\n                sync_2d=False,\n                flip_ratio_bev_horizontal=0.5,\n            ),\n            dict(type='PointSample', num_points=20000),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=False,\n        load_dim=6,\n        use_dim=[0, 1, 2],\n        file_client_args=file_client_args),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['points'])\n]\n\ndata = dict(\n    samples_per_gpu=16,\n    workers_per_gpu=4,\n    train=dict(\n        type='RepeatDataset',\n        times=5,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'sunrgbd_infos_train.pkl',\n            pipeline=train_pipeline,\n            classes=class_names,\n            filter_empty_gt=False,\n            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n            # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n            box_type_3d='Depth',\n            file_client_args=file_client_args)),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'sunrgbd_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='Depth',\n        file_client_args=file_client_args),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'sunrgbd_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='Depth',\n        file_client_args=file_client_args))\n\nevaluation = dict(pipeline=eval_pipeline)\n"
  },
  {
    "path": "configs/_base_/datasets/waymoD5-3d-3class.py",
    "content": "# dataset settings\n# D5 in the config name means the whole dataset is divided into 5 folds\n# We only use one fold for efficient experiments\ndataset_type = 'WaymoDataset'\ndata_root = 'data/waymo/kitti_format/'\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel', path_mapping=dict(data='s3://waymo_data/'))\n\nclass_names = ['Car', 'Pedestrian', 'Cyclist']\npoint_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]\ninput_modality = dict(use_lidar=True, use_camera=False)\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'waymo_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(\n        filter_by_difficulty=[-1],\n        filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),\n    classes=class_names,\n    sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10),\n    points_loader=dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=6,\n        use_dim=[0, 1, 2, 3, 4],\n        file_client_args=file_client_args))\n\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=6,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox_3d=True,\n        with_label_3d=True,\n        file_client_args=file_client_args),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n        flip_ratio_bev_vertical=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.95, 1.05]),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=6,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=6,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['points'])\n]\n\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=4,\n    train=dict(\n        type='RepeatDataset',\n        times=2,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'waymo_infos_train.pkl',\n            split='training',\n            pipeline=train_pipeline,\n            modality=input_modality,\n            classes=class_names,\n            test_mode=False,\n            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n            # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n            box_type_3d='LiDAR',\n            # load one frame every five frames\n            load_interval=5)),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'waymo_infos_val.pkl',\n        split='training',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'waymo_infos_val.pkl',\n        split='training',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'))\n\nevaluation = dict(interval=24, pipeline=eval_pipeline)\n"
  },
  {
    "path": "configs/_base_/datasets/waymoD5-3d-car.py",
    "content": "# dataset settings\n# D5 in the config name means the whole dataset is divided into 5 folds\n# We only use one fold for efficient experiments\ndataset_type = 'WaymoDataset'\ndata_root = 'data/waymo/kitti_format/'\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel', path_mapping=dict(data='s3://waymo_data/'))\n\nclass_names = ['Car']\npoint_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]\ninput_modality = dict(use_lidar=True, use_camera=False)\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'waymo_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),\n    classes=class_names,\n    sample_groups=dict(Car=15),\n    points_loader=dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=6,\n        use_dim=[0, 1, 2, 3, 4],\n        file_client_args=file_client_args))\n\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=6,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox_3d=True,\n        with_label_3d=True,\n        file_client_args=file_client_args),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n        flip_ratio_bev_vertical=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.95, 1.05]),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=6,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=6,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['points'])\n]\n\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=4,\n    train=dict(\n        type='RepeatDataset',\n        times=2,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'waymo_infos_train.pkl',\n            split='training',\n            pipeline=train_pipeline,\n            modality=input_modality,\n            classes=class_names,\n            test_mode=False,\n            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n            # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n            box_type_3d='LiDAR',\n            # load one frame every five frames\n            load_interval=5)),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'waymo_infos_val.pkl',\n        split='training',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'waymo_infos_val.pkl',\n        split='training',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'))\n\nevaluation = dict(interval=24, pipeline=eval_pipeline)\n"
  },
  {
    "path": "configs/_base_/default_runtime.py",
    "content": "checkpoint_config = dict(interval=1)\n# yapf:disable push\n# By default we use textlogger hook and tensorboard\n# For more loggers see\n# https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = None\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n\n# disable opencv multithreading to avoid system being overloaded\nopencv_num_threads = 0\n# set multi-process start method as `fork` to speed up the training\nmp_start_method = 'fork'\n"
  },
  {
    "path": "configs/_base_/init.py",
    "content": "# Copyright (c) Phigent Robotics. All rights reserved.\n\n_base_ = ['../_base_/datasets/nus-3d.py', '../_base_/default_runtime.py']\n# Global\n# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\npoint_cloud_range = [-40, -40, -1.0, 40, 40, 5.4]\n# For nuScenes we usually do 10-class detection\nclass_names = [\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\n\ndata_config = {\n    'cams': [\n        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',\n        'CAM_BACK', 'CAM_BACK_RIGHT'\n    ],\n    'Ncams':\n    6,\n    'input_size': (256, 704),\n    'src_size': (900, 1600),\n\n    # Augmentation\n    'resize': (-0.06, 0.11),\n    'rot': (-5.4, 5.4),\n    'flip': True,\n    'crop_h': (0.0, 0.0),\n    'resize_test': 0.00,\n}\nuse_checkpoint = True\nsync_bn = True\n# Model\ngrid_config = {\n    'x': [-40, 40, 0.8],\n    'y': [-40, 40, 0.8],\n    'z': [-1, 5.4, 0.8],\n    'depth': [2.0, 42.0, 0.5],\n}\ndepth_categories = 80 #(grid_config['depth'][1]-grid_config['depth'][0])//grid_config['depth'][2]\nuse_custom_eval_hook=True\n\n\nbda_aug_conf = dict(\n    rot_lim=(-22.5, 22.5),\n    scale_lim=(1., 1.),\n    flip_dx_ratio=0.5,\n    flip_dy_ratio=0.5)\n\n\nnum_Z_anchors = 8\nvoxel_size = [0.1, 0.1, 0.1]\n\nbev_h_ = 100\nbev_w_ = 100\n_dim_ = 256\n_pos_dim_ = _dim_//2\n_ffn_dim_ = _dim_ * 2\n_num_levels_= 1\nnumC_Trans=80\n\n\nempty_idx = 0  # noise 0-->255\nnum_cls = 18  # 0 free, 1-16 obj\nvisible_mask = False\nimg_norm_cfg = None\n\ncascade_ratio = 4\nsample_from_voxel = False\nsample_from_img = False\nocc_size = [200, 200, 16]\nvoxel_out_indices = (0, 1, 2)\nvoxel_out_channel = 256\nvoxel_channels = [64, 64*2, 64*4]\n\n\nmodel = dict(\n    type='NewBEV',\n    use_depth_supervision=True,\n    img_backbone=dict(\n        # pretrained='ckpts/resnet50-0676ba61.pth',\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(2, 3),\n        frozen_stages=-1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=False,\n        with_cp=use_checkpoint,\n        style='pytorch'),\n    img_neck=dict(\n        type='CustomFPN',\n        in_channels=[1024, 2048],\n        out_channels=_dim_,\n        num_outs=1,\n        start_level=0,\n        with_cp=use_checkpoint,\n        out_ids=[0]),\n    depth_net=dict(\n        type='CM_DepthNet',\n        in_channels=_dim_,\n        context_channels=numC_Trans,\n        downsample=16,\n        grid_config=grid_config,\n        depth_channels=depth_categories,\n        with_cp=use_checkpoint,\n        loss_depth_weight=1.,\n        use_dcn=False,\n    ),\n\n    img_view_transformer=dict(\n        type='LSSViewTransformerFunction3D',\n        grid_config=grid_config,\n        input_size=data_config['input_size'],\n        # in_channels=256,\n        # out_channels=numC_Trans,\n        downsample=16),\n    frpn=None,\n    bevformer_encoder=None,\n    img_bev_encoder_backbone=dict(\n        type='CustomResNet3D',\n        depth=18,\n        with_cp=use_checkpoint,\n        block_strides=[1, 2, 2],\n        n_input_channels=numC_Trans,\n        block_inplanes=voxel_channels,\n        out_indices=voxel_out_indices,\n        norm_cfg=dict(type='SyncBN', requires_grad=True),\n    ),\n    img_bev_encoder_neck=dict(\n        type='FPN3D',\n        with_cp=use_checkpoint,\n        in_channels=voxel_channels,\n        out_channels=voxel_out_channel,\n        norm_cfg=dict(type='SyncBN', requires_grad=True),\n    ),\n    occupancy_head= dict(\n        type='OccHead',\n        with_cp=use_checkpoint,\n        norm_cfg=dict(type='SyncBN', requires_grad=True),\n        soft_weights=True,\n        cascade_ratio=cascade_ratio,\n        sample_from_voxel=sample_from_voxel,\n        sample_from_img=sample_from_img,\n        final_occ_size=occ_size,\n        fine_topk=15000,\n        empty_idx=empty_idx,\n        num_level=len(voxel_out_indices),\n        in_channels=[voxel_out_channel] * len(voxel_out_indices),\n        out_channel=num_cls,\n        point_cloud_range=point_cloud_range,\n        loss_weight_cfg=dict(\n            loss_voxel_ce_weight=1.0,\n            loss_voxel_sem_scal_weight=1.0,\n            loss_voxel_geo_scal_weight=1.0,\n            loss_voxel_lovasz_weight=1.0,\n        ),\n    ),\n    pts_bbox_head=None)\n\n# Data\ndataset_type = 'NuScenesDataset'\ndata_root = 'data/nuscenes/'\nfile_client_args = dict(backend='disk')\noccupancy_path = '/mount/data/occupancy_cvpr2023/gts'\ndense_lidar_prefix = '/mount/data/nuscenes/'\n\ntrain_pipeline = [\n    dict(\n        type='PrepareImageInputs',\n        is_train=True,\n        data_config=data_config),\n    dict(\n        type='LoadAnnotationsBEVDepth',\n        bda_aug_conf=bda_aug_conf,\n        classes=class_names),\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        dense_lidar_prefix=dense_lidar_prefix,\n        file_client_args=file_client_args),\n   dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    #\n    dict(type='LoadBEVMask', point_cloud_range=point_cloud_range, bev_size=(bev_h_, bev_w_)),\n    dict(type='LoadOccupancy', ignore_nonvisible=True, occupancy_path=occupancy_path),\n    \n    # dict(type='PadMultiViewImage'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(\n        type='Collect3D', keys=['img_inputs', 'gt_bboxes_3d', 'gt_labels_3d', 'gt_bev_mask', 'gt_occupancy', 'gt_depth'\n                               ])\n]\n\ntest_pipeline = [\n    dict(type='PrepareImageInputs', data_config=data_config),\n    dict(\n        type='LoadAnnotationsBEVDepth',\n        bda_aug_conf=bda_aug_conf,\n        classes=class_names,\n        is_train=False),\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        dense_lidar_prefix=dense_lidar_prefix,\n        file_client_args=file_client_args),\n    dict(type='LoadBEVMask'),\n    dict(type='LoadOccupancy', occupancy_path=occupancy_path),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points', 'img_inputs', 'gt_bev_mask', 'gt_occupancy', 'visible_mask'])\n        ])\n]\n\ninput_modality = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\n\nshare_data_config = dict(\n    type=dataset_type,\n    classes=class_names,\n    modality=input_modality,\n    img_info_prototype='bevdet',\n    occupancy_path=occupancy_path,\n)\n\ntest_data_config = dict(\n    pipeline=test_pipeline,\n    ann_file=data_root + 'bevdetv2-nuscenes_infos_val.pkl')\n\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=6,\n    test_dataloader=dict(runner_type='EpochBasedRunner'),\n    train=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'bevdetv2-nuscenes_infos_train.pkl',\n        pipeline=train_pipeline,\n        classes=class_names,\n        test_mode=False,\n        use_valid_flag=True,\n        modality=input_modality,\n        img_info_prototype='bevdet',\n        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n        # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n        box_type_3d='LiDAR'),\n    val=test_data_config,\n    test=test_data_config)\n\nfor key in ['val', 'test']:\n    data[key].update(share_data_config)\n# data['train']['dataset'].update(share_data_config)\n\n# Optimizer\noptimizer = dict(type='AdamW', lr=1.4e-4, weight_decay=1e-2)\noptimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=200,\n    warmup_ratio=0.001,\n    step=[1,])\nrunner = dict(type='EpochBasedRunner', max_epochs=1)\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='WechatLoggerHook'),\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\ncustom_hooks = [\n    dict(\n        type='MEGVIIEMAHook',\n        init_updates=10560,\n        priority='NORMAL',\n    ),\n    dict(\n        type='ForgeLoadWorker',\n        priority='VERY_LOW',\n    ),\n]\n# load_from = 'ckpt1s/r50_256x705_depth_pretrain.pth'\nevaluation = dict(interval=12, pipeline=test_pipeline)\nfp16 = dict(loss_scale='dynamic')\n# checkpoint_config = dict(interval=5)\n# find_unused_parameters=True\n\n# Input shape: (256, 704)\n# Flops: 192.3 GFLOPs\n# Params: 58.39 M\n# find_unused_parameters=True"
  },
  {
    "path": "configs/_base_/models/3dssd.py",
    "content": "model = dict(\n    type='SSD3DNet',\n    backbone=dict(\n        type='PointNet2SAMSG',\n        in_channels=4,\n        num_points=(4096, 512, (256, 256)),\n        radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)),\n        num_samples=((32, 32, 64), (32, 32, 64), (32, 32, 32)),\n        sa_channels=(((16, 16, 32), (16, 16, 32), (32, 32, 64)),\n                     ((64, 64, 128), (64, 64, 128), (64, 96, 128)),\n                     ((128, 128, 256), (128, 192, 256), (128, 256, 256))),\n        aggregation_channels=(64, 128, 256),\n        fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')),\n        fps_sample_range_lists=((-1), (-1), (512, -1)),\n        norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),\n        sa_cfg=dict(\n            type='PointSAModuleMSG',\n            pool_mod='max',\n            use_xyz=True,\n            normalize_xyz=False)),\n    bbox_head=dict(\n        type='SSD3DHead',\n        in_channels=256,\n        vote_module_cfg=dict(\n            in_channels=256,\n            num_points=256,\n            gt_per_seed=1,\n            conv_channels=(128, ),\n            conv_cfg=dict(type='Conv1d'),\n            norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),\n            with_res_feat=False,\n            vote_xyz_range=(3.0, 3.0, 2.0)),\n        vote_aggregation_cfg=dict(\n            type='PointSAModuleMSG',\n            num_point=256,\n            radii=(4.8, 6.4),\n            sample_nums=(16, 32),\n            mlp_channels=((256, 256, 256, 512), (256, 256, 512, 1024)),\n            norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),\n            use_xyz=True,\n            normalize_xyz=False,\n            bias=True),\n        pred_layer_cfg=dict(\n            in_channels=1536,\n            shared_conv_channels=(512, 128),\n            cls_conv_channels=(128, ),\n            reg_conv_channels=(128, ),\n            conv_cfg=dict(type='Conv1d'),\n            norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),\n            bias=True),\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),\n        objectness_loss=dict(\n            type='CrossEntropyLoss',\n            use_sigmoid=True,\n            reduction='sum',\n            loss_weight=1.0),\n        center_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=1.0),\n        dir_class_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n        dir_res_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=1.0),\n        size_res_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=1.0),\n        corner_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=1.0),\n        vote_loss=dict(type='SmoothL1Loss', reduction='sum', loss_weight=1.0)),\n    # model training and testing settings\n    train_cfg=dict(\n        sample_mod='spec', pos_distance_thr=10.0, expand_dims_length=0.05),\n    test_cfg=dict(\n        nms_cfg=dict(type='nms', iou_thr=0.1),\n        sample_mod='spec',\n        score_thr=0.0,\n        per_class_proposal=True,\n        max_output_num=100))\n"
  },
  {
    "path": "configs/_base_/models/cascade_mask_rcnn_r50_fpn.py",
    "content": "# model settings\nmodel = dict(\n    type='CascadeRCNN',\n    pretrained='torchvision://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            scales=[8],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[4, 8, 16, 32, 64]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),\n    roi_head=dict(\n        type='CascadeRoIHead',\n        num_stages=3,\n        stage_loss_weights=[1, 0.5, 0.25],\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        bbox_head=[\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.1, 0.1, 0.2, 0.2]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.05, 0.05, 0.1, 0.1]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.033, 0.033, 0.067, 0.067]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))\n        ],\n        mask_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        mask_head=dict(\n            type='FCNMaskHead',\n            num_convs=4,\n            in_channels=256,\n            conv_out_channels=256,\n            num_classes=80,\n            loss_mask=dict(\n                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=0,\n            pos_weight=-1,\n            debug=False),\n        rpn_proposal=dict(\n            nms_pre=2000,\n            nms_post=2000,\n            max_per_img=2000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=[\n            dict(\n                assigner=dict(\n                    type='MaxIoUAssigner',\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.5,\n                    min_pos_iou=0.5,\n                    match_low_quality=False,\n                    ignore_iof_thr=-1),\n                sampler=dict(\n                    type='RandomSampler',\n                    num=512,\n                    pos_fraction=0.25,\n                    neg_pos_ub=-1,\n                    add_gt_as_proposals=True),\n                mask_size=28,\n                pos_weight=-1,\n                debug=False),\n            dict(\n                assigner=dict(\n                    type='MaxIoUAssigner',\n                    pos_iou_thr=0.6,\n                    neg_iou_thr=0.6,\n                    min_pos_iou=0.6,\n                    match_low_quality=False,\n                    ignore_iof_thr=-1),\n                sampler=dict(\n                    type='RandomSampler',\n                    num=512,\n                    pos_fraction=0.25,\n                    neg_pos_ub=-1,\n                    add_gt_as_proposals=True),\n                mask_size=28,\n                pos_weight=-1,\n                debug=False),\n            dict(\n                assigner=dict(\n                    type='MaxIoUAssigner',\n                    pos_iou_thr=0.7,\n                    neg_iou_thr=0.7,\n                    min_pos_iou=0.7,\n                    match_low_quality=False,\n                    ignore_iof_thr=-1),\n                sampler=dict(\n                    type='RandomSampler',\n                    num=512,\n                    pos_fraction=0.25,\n                    neg_pos_ub=-1,\n                    add_gt_as_proposals=True),\n                mask_size=28,\n                pos_weight=-1,\n                debug=False)\n        ]),\n    test_cfg=dict(\n        rpn=dict(\n            nms_pre=1000,\n            nms_post=1000,\n            max_per_img=1000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=dict(\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100,\n            mask_thr_binary=0.5)))\n"
  },
  {
    "path": "configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py",
    "content": "voxel_size = [0.1, 0.1, 0.2]\nmodel = dict(\n    type='CenterPoint',\n    pts_voxel_layer=dict(\n        max_num_points=10, voxel_size=voxel_size, max_voxels=(90000, 120000)),\n    pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5),\n    pts_middle_encoder=dict(\n        type='SparseEncoder',\n        in_channels=5,\n        sparse_shape=[41, 1024, 1024],\n        output_channels=128,\n        order=('conv', 'norm', 'act'),\n        encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128,\n                                                                      128)),\n        encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),\n        block_type='basicblock'),\n    pts_backbone=dict(\n        type='SECOND',\n        in_channels=256,\n        out_channels=[128, 256],\n        layer_nums=[5, 5],\n        layer_strides=[1, 2],\n        norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),\n        conv_cfg=dict(type='Conv2d', bias=False)),\n    pts_neck=dict(\n        type='SECONDFPN',\n        in_channels=[128, 256],\n        out_channels=[256, 256],\n        upsample_strides=[1, 2],\n        norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),\n        upsample_cfg=dict(type='deconv', bias=False),\n        use_conv_for_no_stride=True),\n    pts_bbox_head=dict(\n        type='CenterHead',\n        in_channels=sum([256, 256]),\n        tasks=[\n            dict(num_class=1, class_names=['car']),\n            dict(num_class=2, class_names=['truck', 'construction_vehicle']),\n            dict(num_class=2, class_names=['bus', 'trailer']),\n            dict(num_class=1, class_names=['barrier']),\n            dict(num_class=2, class_names=['motorcycle', 'bicycle']),\n            dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),\n        ],\n        common_heads=dict(\n            reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),\n        share_conv_channel=64,\n        bbox_coder=dict(\n            type='CenterPointBBoxCoder',\n            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n            max_num=500,\n            score_threshold=0.1,\n            out_size_factor=8,\n            voxel_size=voxel_size[:2],\n            code_size=9),\n        separate_head=dict(\n            type='SeparateHead', init_bias=-2.19, final_kernel=3),\n        loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),\n        loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),\n        norm_bbox=True),\n    # model training and testing settings\n    train_cfg=dict(\n        pts=dict(\n            grid_size=[1024, 1024, 40],\n            voxel_size=voxel_size,\n            out_size_factor=8,\n            dense_reg=1,\n            gaussian_overlap=0.1,\n            max_objs=500,\n            min_radius=2,\n            code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),\n    test_cfg=dict(\n        pts=dict(\n            post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n            max_per_img=500,\n            max_pool_nms=False,\n            min_radius=[4, 12, 10, 1, 0.85, 0.175],\n            score_threshold=0.1,\n            out_size_factor=8,\n            voxel_size=voxel_size[:2],\n            nms_type='rotate',\n            pre_max_size=1000,\n            post_max_size=83,\n            nms_thr=0.2)))\n"
  },
  {
    "path": "configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py",
    "content": "voxel_size = [0.2, 0.2, 8]\nmodel = dict(\n    type='CenterPoint',\n    pts_voxel_layer=dict(\n        max_num_points=20, voxel_size=voxel_size, max_voxels=(30000, 40000)),\n    pts_voxel_encoder=dict(\n        type='PillarFeatureNet',\n        in_channels=5,\n        feat_channels=[64],\n        with_distance=False,\n        voxel_size=(0.2, 0.2, 8),\n        norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n        legacy=False),\n    pts_middle_encoder=dict(\n        type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)),\n    pts_backbone=dict(\n        type='SECOND',\n        in_channels=64,\n        out_channels=[64, 128, 256],\n        layer_nums=[3, 5, 5],\n        layer_strides=[2, 2, 2],\n        norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),\n        conv_cfg=dict(type='Conv2d', bias=False)),\n    pts_neck=dict(\n        type='SECONDFPN',\n        in_channels=[64, 128, 256],\n        out_channels=[128, 128, 128],\n        upsample_strides=[0.5, 1, 2],\n        norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),\n        upsample_cfg=dict(type='deconv', bias=False),\n        use_conv_for_no_stride=True),\n    pts_bbox_head=dict(\n        type='CenterHead',\n        in_channels=sum([128, 128, 128]),\n        tasks=[\n            dict(num_class=1, class_names=['car']),\n            dict(num_class=2, class_names=['truck', 'construction_vehicle']),\n            dict(num_class=2, class_names=['bus', 'trailer']),\n            dict(num_class=1, class_names=['barrier']),\n            dict(num_class=2, class_names=['motorcycle', 'bicycle']),\n            dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),\n        ],\n        common_heads=dict(\n            reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),\n        share_conv_channel=64,\n        bbox_coder=dict(\n            type='CenterPointBBoxCoder',\n            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n            max_num=500,\n            score_threshold=0.1,\n            out_size_factor=4,\n            voxel_size=voxel_size[:2],\n            code_size=9),\n        separate_head=dict(\n            type='SeparateHead', init_bias=-2.19, final_kernel=3),\n        loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),\n        loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),\n        norm_bbox=True),\n    # model training and testing settings\n    train_cfg=dict(\n        pts=dict(\n            grid_size=[512, 512, 1],\n            voxel_size=voxel_size,\n            out_size_factor=4,\n            dense_reg=1,\n            gaussian_overlap=0.1,\n            max_objs=500,\n            min_radius=2,\n            code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),\n    test_cfg=dict(\n        pts=dict(\n            post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n            max_per_img=500,\n            max_pool_nms=False,\n            min_radius=[4, 12, 10, 1, 0.85, 0.175],\n            score_threshold=0.1,\n            pc_range=[-51.2, -51.2],\n            out_size_factor=4,\n            voxel_size=voxel_size[:2],\n            nms_type='rotate',\n            pre_max_size=1000,\n            post_max_size=83,\n            nms_thr=0.2)))\n"
  },
  {
    "path": "configs/_base_/models/dgcnn.py",
    "content": "# model settings\nmodel = dict(\n    type='EncoderDecoder3D',\n    backbone=dict(\n        type='DGCNNBackbone',\n        in_channels=9,  # [xyz, rgb, normal_xyz], modified with dataset\n        num_samples=(20, 20, 20),\n        knn_modes=('D-KNN', 'F-KNN', 'F-KNN'),\n        radius=(None, None, None),\n        gf_channels=((64, 64), (64, 64), (64, )),\n        fa_channels=(1024, ),\n        act_cfg=dict(type='LeakyReLU', negative_slope=0.2)),\n    decode_head=dict(\n        type='DGCNNHead',\n        fp_channels=(1216, 512),\n        channels=256,\n        dropout_ratio=0.5,\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d'),\n        act_cfg=dict(type='LeakyReLU', negative_slope=0.2),\n        loss_decode=dict(\n            type='CrossEntropyLoss',\n            use_sigmoid=False,\n            class_weight=None,  # modified with dataset\n            loss_weight=1.0)),\n    # model training and testing settings\n    train_cfg=dict(),\n    test_cfg=dict(mode='slide'))\n"
  },
  {
    "path": "configs/_base_/models/fcaf3d.py",
    "content": "model = dict(\n    type='MinkSingleStage3DDetector',\n    voxel_size=.01,\n    backbone=dict(type='MinkResNet', in_channels=3, depth=34),\n    head=dict(\n        type='FCAF3DHead',\n        in_channels=(64, 128, 256, 512),\n        out_channels=128,\n        voxel_size=.01,\n        pts_prune_threshold=100000,\n        pts_assign_threshold=27,\n        pts_center_threshold=18,\n        n_classes=18,\n        n_reg_outs=6),\n    train_cfg=dict(),\n    test_cfg=dict(nms_pre=1000, iou_thr=.5, score_thr=.01))\n"
  },
  {
    "path": "configs/_base_/models/fcos3d.py",
    "content": "model = dict(\n    type='FCOSMono3D',\n    backbone=dict(\n        type='ResNet',\n        depth=101,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet101_caffe')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs='on_output',\n        num_outs=5,\n        relu_before_extra_convs=True),\n    bbox_head=dict(\n        type='FCOSMono3DHead',\n        num_classes=10,\n        in_channels=256,\n        stacked_convs=2,\n        feat_channels=256,\n        use_direction_classifier=True,\n        diff_rad_by_sin=True,\n        pred_attrs=True,\n        pred_velo=True,\n        dir_offset=0.7854,  # pi/4\n        dir_limit_offset=0,\n        strides=[8, 16, 32, 64, 128],\n        group_reg_dims=(2, 1, 3, 1, 2),  # offset, depth, size, rot, velo\n        cls_branch=(256, ),\n        reg_branch=(\n            (256, ),  # offset\n            (256, ),  # depth\n            (256, ),  # size\n            (256, ),  # rot\n            ()  # velo\n        ),\n        dir_branch=(256, ),\n        attr_branch=(256, ),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n        loss_attr=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n        loss_centerness=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        bbox_coder=dict(type='FCOS3DBBoxCoder', code_size=9),\n        norm_on_bbox=True,\n        centerness_on_reg=True,\n        center_sampling=True,\n        conv_bias=True,\n        dcn_on_last_conv=True),\n    train_cfg=dict(\n        allowed_border=0,\n        code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05],\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        use_rotate_nms=True,\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_thr=0.8,\n        score_thr=0.05,\n        min_bbox_size=0,\n        max_per_img=200))\n"
  },
  {
    "path": "configs/_base_/models/groupfree3d.py",
    "content": "model = dict(\n    type='GroupFree3DNet',\n    backbone=dict(\n        type='PointNet2SASSG',\n        in_channels=3,\n        num_points=(2048, 1024, 512, 256),\n        radius=(0.2, 0.4, 0.8, 1.2),\n        num_samples=(64, 32, 16, 16),\n        sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),\n                     (128, 128, 256)),\n        fp_channels=((256, 256), (256, 288)),\n        norm_cfg=dict(type='BN2d'),\n        sa_cfg=dict(\n            type='PointSAModule',\n            pool_mod='max',\n            use_xyz=True,\n            normalize_xyz=True)),\n    bbox_head=dict(\n        type='GroupFree3DHead',\n        in_channels=288,\n        num_decoder_layers=6,\n        num_proposal=256,\n        transformerlayers=dict(\n            type='BaseTransformerLayer',\n            attn_cfgs=dict(\n                type='GroupFree3DMHA',\n                embed_dims=288,\n                num_heads=8,\n                attn_drop=0.1,\n                dropout_layer=dict(type='Dropout', drop_prob=0.1)),\n            ffn_cfgs=dict(\n                embed_dims=288,\n                feedforward_channels=2048,\n                ffn_drop=0.1,\n                act_cfg=dict(type='ReLU', inplace=True)),\n            operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn',\n                             'norm')),\n        pred_layer_cfg=dict(\n            in_channels=288, shared_conv_channels=(288, 288), bias=True),\n        sampling_objectness_loss=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=8.0),\n        objectness_loss=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        center_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=10.0),\n        dir_class_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n        dir_res_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=10.0),\n        size_class_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n        size_res_loss=dict(\n            type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0),\n        semantic_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),\n    # model training and testing settings\n    train_cfg=dict(sample_mod='kps'),\n    test_cfg=dict(\n        sample_mod='kps',\n        nms_thr=0.25,\n        score_thr=0.0,\n        per_class_proposal=True,\n        prediction_stages='last'))\n"
  },
  {
    "path": "configs/_base_/models/h3dnet.py",
    "content": "primitive_z_cfg = dict(\n    type='PrimitiveHead',\n    num_dims=2,\n    num_classes=18,\n    primitive_mode='z',\n    upper_thresh=100.0,\n    surface_thresh=0.5,\n    vote_module_cfg=dict(\n        in_channels=256,\n        vote_per_seed=1,\n        gt_per_seed=1,\n        conv_channels=(256, 256),\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d'),\n        norm_feats=True,\n        vote_loss=dict(\n            type='ChamferDistance',\n            mode='l1',\n            reduction='none',\n            loss_dst_weight=10.0)),\n    vote_aggregation_cfg=dict(\n        type='PointSAModule',\n        num_point=1024,\n        radius=0.3,\n        num_sample=16,\n        mlp_channels=[256, 128, 128, 128],\n        use_xyz=True,\n        normalize_xyz=True),\n    feat_channels=(128, 128),\n    conv_cfg=dict(type='Conv1d'),\n    norm_cfg=dict(type='BN1d'),\n    objectness_loss=dict(\n        type='CrossEntropyLoss',\n        class_weight=[0.4, 0.6],\n        reduction='mean',\n        loss_weight=30.0),\n    center_loss=dict(\n        type='ChamferDistance',\n        mode='l1',\n        reduction='sum',\n        loss_src_weight=0.5,\n        loss_dst_weight=0.5),\n    semantic_reg_loss=dict(\n        type='ChamferDistance',\n        mode='l1',\n        reduction='sum',\n        loss_src_weight=0.5,\n        loss_dst_weight=0.5),\n    semantic_cls_loss=dict(\n        type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n    train_cfg=dict(\n        dist_thresh=0.2,\n        var_thresh=1e-2,\n        lower_thresh=1e-6,\n        num_point=100,\n        num_point_line=10,\n        line_thresh=0.2))\n\nprimitive_xy_cfg = dict(\n    type='PrimitiveHead',\n    num_dims=1,\n    num_classes=18,\n    primitive_mode='xy',\n    upper_thresh=100.0,\n    surface_thresh=0.5,\n    vote_module_cfg=dict(\n        in_channels=256,\n        vote_per_seed=1,\n        gt_per_seed=1,\n        conv_channels=(256, 256),\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d'),\n        norm_feats=True,\n        vote_loss=dict(\n            type='ChamferDistance',\n            mode='l1',\n            reduction='none',\n            loss_dst_weight=10.0)),\n    vote_aggregation_cfg=dict(\n        type='PointSAModule',\n        num_point=1024,\n        radius=0.3,\n        num_sample=16,\n        mlp_channels=[256, 128, 128, 128],\n        use_xyz=True,\n        normalize_xyz=True),\n    feat_channels=(128, 128),\n    conv_cfg=dict(type='Conv1d'),\n    norm_cfg=dict(type='BN1d'),\n    objectness_loss=dict(\n        type='CrossEntropyLoss',\n        class_weight=[0.4, 0.6],\n        reduction='mean',\n        loss_weight=30.0),\n    center_loss=dict(\n        type='ChamferDistance',\n        mode='l1',\n        reduction='sum',\n        loss_src_weight=0.5,\n        loss_dst_weight=0.5),\n    semantic_reg_loss=dict(\n        type='ChamferDistance',\n        mode='l1',\n        reduction='sum',\n        loss_src_weight=0.5,\n        loss_dst_weight=0.5),\n    semantic_cls_loss=dict(\n        type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n    train_cfg=dict(\n        dist_thresh=0.2,\n        var_thresh=1e-2,\n        lower_thresh=1e-6,\n        num_point=100,\n        num_point_line=10,\n        line_thresh=0.2))\n\nprimitive_line_cfg = dict(\n    type='PrimitiveHead',\n    num_dims=0,\n    num_classes=18,\n    primitive_mode='line',\n    upper_thresh=100.0,\n    surface_thresh=0.5,\n    vote_module_cfg=dict(\n        in_channels=256,\n        vote_per_seed=1,\n        gt_per_seed=1,\n        conv_channels=(256, 256),\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d'),\n        norm_feats=True,\n        vote_loss=dict(\n            type='ChamferDistance',\n            mode='l1',\n            reduction='none',\n            loss_dst_weight=10.0)),\n    vote_aggregation_cfg=dict(\n        type='PointSAModule',\n        num_point=1024,\n        radius=0.3,\n        num_sample=16,\n        mlp_channels=[256, 128, 128, 128],\n        use_xyz=True,\n        normalize_xyz=True),\n    feat_channels=(128, 128),\n    conv_cfg=dict(type='Conv1d'),\n    norm_cfg=dict(type='BN1d'),\n    objectness_loss=dict(\n        type='CrossEntropyLoss',\n        class_weight=[0.4, 0.6],\n        reduction='mean',\n        loss_weight=30.0),\n    center_loss=dict(\n        type='ChamferDistance',\n        mode='l1',\n        reduction='sum',\n        loss_src_weight=1.0,\n        loss_dst_weight=1.0),\n    semantic_reg_loss=dict(\n        type='ChamferDistance',\n        mode='l1',\n        reduction='sum',\n        loss_src_weight=1.0,\n        loss_dst_weight=1.0),\n    semantic_cls_loss=dict(\n        type='CrossEntropyLoss', reduction='sum', loss_weight=2.0),\n    train_cfg=dict(\n        dist_thresh=0.2,\n        var_thresh=1e-2,\n        lower_thresh=1e-6,\n        num_point=100,\n        num_point_line=10,\n        line_thresh=0.2))\n\nmodel = dict(\n    type='H3DNet',\n    backbone=dict(\n        type='MultiBackbone',\n        num_streams=4,\n        suffixes=['net0', 'net1', 'net2', 'net3'],\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d', eps=1e-5, momentum=0.01),\n        act_cfg=dict(type='ReLU'),\n        backbones=dict(\n            type='PointNet2SASSG',\n            in_channels=4,\n            num_points=(2048, 1024, 512, 256),\n            radius=(0.2, 0.4, 0.8, 1.2),\n            num_samples=(64, 32, 16, 16),\n            sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),\n                         (128, 128, 256)),\n            fp_channels=((256, 256), (256, 256)),\n            norm_cfg=dict(type='BN2d'),\n            sa_cfg=dict(\n                type='PointSAModule',\n                pool_mod='max',\n                use_xyz=True,\n                normalize_xyz=True))),\n    rpn_head=dict(\n        type='VoteHead',\n        vote_module_cfg=dict(\n            in_channels=256,\n            vote_per_seed=1,\n            gt_per_seed=3,\n            conv_channels=(256, 256),\n            conv_cfg=dict(type='Conv1d'),\n            norm_cfg=dict(type='BN1d'),\n            norm_feats=True,\n            vote_loss=dict(\n                type='ChamferDistance',\n                mode='l1',\n                reduction='none',\n                loss_dst_weight=10.0)),\n        vote_aggregation_cfg=dict(\n            type='PointSAModule',\n            num_point=256,\n            radius=0.3,\n            num_sample=16,\n            mlp_channels=[256, 128, 128, 128],\n            use_xyz=True,\n            normalize_xyz=True),\n        pred_layer_cfg=dict(\n            in_channels=128, shared_conv_channels=(128, 128), bias=True),\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d'),\n        objectness_loss=dict(\n            type='CrossEntropyLoss',\n            class_weight=[0.2, 0.8],\n            reduction='sum',\n            loss_weight=5.0),\n        center_loss=dict(\n            type='ChamferDistance',\n            mode='l2',\n            reduction='sum',\n            loss_src_weight=10.0,\n            loss_dst_weight=10.0),\n        dir_class_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n        dir_res_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=10.0),\n        size_class_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n        size_res_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=10.0),\n        semantic_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),\n    roi_head=dict(\n        type='H3DRoIHead',\n        primitive_list=[primitive_z_cfg, primitive_xy_cfg, primitive_line_cfg],\n        bbox_head=dict(\n            type='H3DBboxHead',\n            gt_per_seed=3,\n            num_proposal=256,\n            suface_matching_cfg=dict(\n                type='PointSAModule',\n                num_point=256 * 6,\n                radius=0.5,\n                num_sample=32,\n                mlp_channels=[128 + 6, 128, 64, 32],\n                use_xyz=True,\n                normalize_xyz=True),\n            line_matching_cfg=dict(\n                type='PointSAModule',\n                num_point=256 * 12,\n                radius=0.5,\n                num_sample=32,\n                mlp_channels=[128 + 12, 128, 64, 32],\n                use_xyz=True,\n                normalize_xyz=True),\n            feat_channels=(128, 128),\n            primitive_refine_channels=[128, 128, 128],\n            upper_thresh=100.0,\n            surface_thresh=0.5,\n            line_thresh=0.5,\n            conv_cfg=dict(type='Conv1d'),\n            norm_cfg=dict(type='BN1d'),\n            objectness_loss=dict(\n                type='CrossEntropyLoss',\n                class_weight=[0.2, 0.8],\n                reduction='sum',\n                loss_weight=5.0),\n            center_loss=dict(\n                type='ChamferDistance',\n                mode='l2',\n                reduction='sum',\n                loss_src_weight=10.0,\n                loss_dst_weight=10.0),\n            dir_class_loss=dict(\n                type='CrossEntropyLoss', reduction='sum', loss_weight=0.1),\n            dir_res_loss=dict(\n                type='SmoothL1Loss', reduction='sum', loss_weight=10.0),\n            size_class_loss=dict(\n                type='CrossEntropyLoss', reduction='sum', loss_weight=0.1),\n            size_res_loss=dict(\n                type='SmoothL1Loss', reduction='sum', loss_weight=10.0),\n            semantic_loss=dict(\n                type='CrossEntropyLoss', reduction='sum', loss_weight=0.1),\n            cues_objectness_loss=dict(\n                type='CrossEntropyLoss',\n                class_weight=[0.3, 0.7],\n                reduction='mean',\n                loss_weight=5.0),\n            cues_semantic_loss=dict(\n                type='CrossEntropyLoss',\n                class_weight=[0.3, 0.7],\n                reduction='mean',\n                loss_weight=5.0),\n            proposal_objectness_loss=dict(\n                type='CrossEntropyLoss',\n                class_weight=[0.2, 0.8],\n                reduction='none',\n                loss_weight=5.0),\n            primitive_center_loss=dict(\n                type='MSELoss', reduction='none', loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'),\n        rpn_proposal=dict(use_nms=False),\n        rcnn=dict(\n            pos_distance_thr=0.3,\n            neg_distance_thr=0.6,\n            sample_mod='vote',\n            far_threshold=0.6,\n            near_threshold=0.3,\n            mask_surface_threshold=0.3,\n            label_surface_threshold=0.3,\n            mask_line_threshold=0.3,\n            label_line_threshold=0.3)),\n    test_cfg=dict(\n        rpn=dict(\n            sample_mod='seed',\n            nms_thr=0.25,\n            score_thr=0.05,\n            per_class_proposal=True,\n            use_nms=False),\n        rcnn=dict(\n            sample_mod='seed',\n            nms_thr=0.25,\n            score_thr=0.05,\n            per_class_proposal=True)))\n"
  },
  {
    "path": "configs/_base_/models/hv_pointpillars_fpn_lyft.py",
    "content": "_base_ = './hv_pointpillars_fpn_nus.py'\n\n# model settings (based on nuScenes model settings)\n# Voxel size for voxel encoder\n# Usually voxel size is changed consistently with the point cloud range\n# If point cloud range is modified, do remember to change all related\n# keys in the config.\nmodel = dict(\n    pts_voxel_layer=dict(\n        max_num_points=20,\n        point_cloud_range=[-80, -80, -5, 80, 80, 3],\n        max_voxels=(60000, 60000)),\n    pts_voxel_encoder=dict(\n        feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]),\n    pts_middle_encoder=dict(output_shape=[640, 640]),\n    pts_bbox_head=dict(\n        num_classes=9,\n        anchor_generator=dict(\n            ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]),\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),\n    # model training settings (based on nuScenes model settings)\n    train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))\n"
  },
  {
    "path": "configs/_base_/models/hv_pointpillars_fpn_nus.py",
    "content": "# model settings\n# Voxel size for voxel encoder\n# Usually voxel size is changed consistently with the point cloud range\n# If point cloud range is modified, do remember to change all related\n# keys in the config.\nvoxel_size = [0.25, 0.25, 8]\nmodel = dict(\n    type='MVXFasterRCNN',\n    pts_voxel_layer=dict(\n        max_num_points=64,\n        point_cloud_range=[-50, -50, -5, 50, 50, 3],\n        voxel_size=voxel_size,\n        max_voxels=(30000, 40000)),\n    pts_voxel_encoder=dict(\n        type='HardVFE',\n        in_channels=4,\n        feat_channels=[64, 64],\n        with_distance=False,\n        voxel_size=voxel_size,\n        with_cluster_center=True,\n        with_voxel_center=True,\n        point_cloud_range=[-50, -50, -5, 50, 50, 3],\n        norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),\n    pts_middle_encoder=dict(\n        type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]),\n    pts_backbone=dict(\n        type='SECOND',\n        in_channels=64,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        layer_nums=[3, 5, 5],\n        layer_strides=[2, 2, 2],\n        out_channels=[64, 128, 256]),\n    pts_neck=dict(\n        type='FPN',\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        act_cfg=dict(type='ReLU'),\n        in_channels=[64, 128, 256],\n        out_channels=256,\n        start_level=0,\n        num_outs=3),\n    pts_bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=10,\n        in_channels=256,\n        feat_channels=256,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='AlignedAnchor3DRangeGenerator',\n            ranges=[[-50, -50, -1.8, 50, 50, -1.8]],\n            scales=[1, 2, 4],\n            sizes=[\n                [2.5981, 0.8660, 1.],  # 1.5 / sqrt(3)\n                [1.7321, 0.5774, 1.],  # 1 / sqrt(3)\n                [1., 1., 1.],\n                [0.4, 0.4, 1],\n            ],\n            custom_values=[0, 0],\n            rotations=[0, 1.57],\n            reshape_out=True),\n        assigner_per_size=False,\n        diff_rad_by_sin=True,\n        dir_offset=-0.7854,  # -pi / 4\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    # model training and testing settings\n    train_cfg=dict(\n        pts=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                ignore_iof_thr=-1),\n            allowed_border=0,\n            code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],\n            pos_weight=-1,\n            debug=False)),\n    test_cfg=dict(\n        pts=dict(\n            use_rotate_nms=True,\n            nms_across_levels=False,\n            nms_pre=1000,\n            nms_thr=0.2,\n            score_thr=0.05,\n            min_bbox_size=0,\n            max_num=500)))\n"
  },
  {
    "path": "configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py",
    "content": "_base_ = './hv_pointpillars_fpn_nus.py'\n\n# model settings (based on nuScenes model settings)\n# Voxel size for voxel encoder\n# Usually voxel size is changed consistently with the point cloud range\n# If point cloud range is modified, do remember to change all related\n# keys in the config.\nmodel = dict(\n    pts_voxel_layer=dict(\n        max_num_points=20,\n        point_cloud_range=[-100, -100, -5, 100, 100, 3],\n        max_voxels=(60000, 60000)),\n    pts_voxel_encoder=dict(\n        feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]),\n    pts_middle_encoder=dict(output_shape=[800, 800]),\n    pts_bbox_head=dict(\n        num_classes=9,\n        anchor_generator=dict(\n            ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]),\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),\n    # model training settings (based on nuScenes model settings)\n    train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))\n"
  },
  {
    "path": "configs/_base_/models/hv_pointpillars_secfpn_kitti.py",
    "content": "voxel_size = [0.16, 0.16, 4]\n\nmodel = dict(\n    type='VoxelNet',\n    voxel_layer=dict(\n        max_num_points=32,  # max_points_per_voxel\n        point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1],\n        voxel_size=voxel_size,\n        max_voxels=(16000, 40000)  # (training, testing) max_voxels\n    ),\n    voxel_encoder=dict(\n        type='PillarFeatureNet',\n        in_channels=4,\n        feat_channels=[64],\n        with_distance=False,\n        voxel_size=voxel_size,\n        point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]),\n    middle_encoder=dict(\n        type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]),\n    backbone=dict(\n        type='SECOND',\n        in_channels=64,\n        layer_nums=[3, 5, 5],\n        layer_strides=[2, 2, 2],\n        out_channels=[64, 128, 256]),\n    neck=dict(\n        type='SECONDFPN',\n        in_channels=[64, 128, 256],\n        upsample_strides=[1, 2, 4],\n        out_channels=[128, 128, 128]),\n    bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=3,\n        in_channels=384,\n        feat_channels=384,\n        use_direction_classifier=True,\n        assign_per_class=True,\n        anchor_generator=dict(\n            type='AlignedAnchor3DRangeGenerator',\n            ranges=[\n                [0, -39.68, -0.6, 69.12, 39.68, -0.6],\n                [0, -39.68, -0.6, 69.12, 39.68, -0.6],\n                [0, -39.68, -1.78, 69.12, 39.68, -1.78],\n            ],\n            sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]],\n            rotations=[0, 1.57],\n            reshape_out=False),\n        diff_rad_by_sin=True,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    # model training and testing settings\n    train_cfg=dict(\n        assigner=[\n            dict(  # for Pedestrian\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.35,\n                min_pos_iou=0.35,\n                ignore_iof_thr=-1),\n            dict(  # for Cyclist\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.35,\n                min_pos_iou=0.35,\n                ignore_iof_thr=-1),\n            dict(  # for Car\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.45,\n                min_pos_iou=0.45,\n                ignore_iof_thr=-1),\n        ],\n        allowed_border=0,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        use_rotate_nms=True,\n        nms_across_levels=False,\n        nms_thr=0.01,\n        score_thr=0.1,\n        min_bbox_size=0,\n        nms_pre=100,\n        max_num=50))\n"
  },
  {
    "path": "configs/_base_/models/hv_pointpillars_secfpn_waymo.py",
    "content": "# model settings\n# Voxel size for voxel encoder\n# Usually voxel size is changed consistently with the point cloud range\n# If point cloud range is modified, do remember to change all related\n# keys in the config.\nvoxel_size = [0.32, 0.32, 6]\nmodel = dict(\n    type='MVXFasterRCNN',\n    pts_voxel_layer=dict(\n        max_num_points=20,\n        point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],\n        voxel_size=voxel_size,\n        max_voxels=(32000, 32000)),\n    pts_voxel_encoder=dict(\n        type='HardVFE',\n        in_channels=5,\n        feat_channels=[64],\n        with_distance=False,\n        voxel_size=voxel_size,\n        with_cluster_center=True,\n        with_voxel_center=True,\n        point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],\n        norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),\n    pts_middle_encoder=dict(\n        type='PointPillarsScatter', in_channels=64, output_shape=[468, 468]),\n    pts_backbone=dict(\n        type='SECOND',\n        in_channels=64,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        layer_nums=[3, 5, 5],\n        layer_strides=[1, 2, 2],\n        out_channels=[64, 128, 256]),\n    pts_neck=dict(\n        type='SECONDFPN',\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        in_channels=[64, 128, 256],\n        upsample_strides=[1, 2, 4],\n        out_channels=[128, 128, 128]),\n    pts_bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=3,\n        in_channels=384,\n        feat_channels=384,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='AlignedAnchor3DRangeGenerator',\n            ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345],\n                    [-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188],\n                    [-74.88, -74.88, 0, 74.88, 74.88, 0]],\n            sizes=[\n                [4.73, 2.08, 1.77],  # car\n                [1.81, 0.84, 1.77],  # cyclist\n                [0.91, 0.84, 1.74]  # pedestrian\n            ],\n            rotations=[0, 1.57],\n            reshape_out=False),\n        diff_rad_by_sin=True,\n        dir_offset=-0.7854,  # -pi / 4\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    # model training and testing settings\n    train_cfg=dict(\n        pts=dict(\n            assigner=[\n                dict(  # car\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.55,\n                    neg_iou_thr=0.4,\n                    min_pos_iou=0.4,\n                    ignore_iof_thr=-1),\n                dict(  # cyclist\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.3,\n                    min_pos_iou=0.3,\n                    ignore_iof_thr=-1),\n                dict(  # pedestrian\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.3,\n                    min_pos_iou=0.3,\n                    ignore_iof_thr=-1),\n            ],\n            allowed_border=0,\n            code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],\n            pos_weight=-1,\n            debug=False)),\n    test_cfg=dict(\n        pts=dict(\n            use_rotate_nms=True,\n            nms_across_levels=False,\n            nms_pre=4096,\n            nms_thr=0.25,\n            score_thr=0.1,\n            min_bbox_size=0,\n            max_num=500)))\n"
  },
  {
    "path": "configs/_base_/models/hv_second_secfpn_kitti.py",
    "content": "voxel_size = [0.05, 0.05, 0.1]\n\nmodel = dict(\n    type='VoxelNet',\n    voxel_layer=dict(\n        max_num_points=5,\n        point_cloud_range=[0, -40, -3, 70.4, 40, 1],\n        voxel_size=voxel_size,\n        max_voxels=(16000, 40000)),\n    voxel_encoder=dict(type='HardSimpleVFE'),\n    middle_encoder=dict(\n        type='SparseEncoder',\n        in_channels=4,\n        sparse_shape=[41, 1600, 1408],\n        order=('conv', 'norm', 'act')),\n    backbone=dict(\n        type='SECOND',\n        in_channels=256,\n        layer_nums=[5, 5],\n        layer_strides=[1, 2],\n        out_channels=[128, 256]),\n    neck=dict(\n        type='SECONDFPN',\n        in_channels=[128, 256],\n        upsample_strides=[1, 2],\n        out_channels=[256, 256]),\n    bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=3,\n        in_channels=512,\n        feat_channels=512,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='Anchor3DRangeGenerator',\n            ranges=[\n                [0, -40.0, -0.6, 70.4, 40.0, -0.6],\n                [0, -40.0, -0.6, 70.4, 40.0, -0.6],\n                [0, -40.0, -1.78, 70.4, 40.0, -1.78],\n            ],\n            sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]],\n            rotations=[0, 1.57],\n            reshape_out=False),\n        diff_rad_by_sin=True,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    # model training and testing settings\n    train_cfg=dict(\n        assigner=[\n            dict(  # for Pedestrian\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.35,\n                neg_iou_thr=0.2,\n                min_pos_iou=0.2,\n                ignore_iof_thr=-1),\n            dict(  # for Cyclist\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.35,\n                neg_iou_thr=0.2,\n                min_pos_iou=0.2,\n                ignore_iof_thr=-1),\n            dict(  # for Car\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.45,\n                min_pos_iou=0.45,\n                ignore_iof_thr=-1),\n        ],\n        allowed_border=0,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        use_rotate_nms=True,\n        nms_across_levels=False,\n        nms_thr=0.01,\n        score_thr=0.1,\n        min_bbox_size=0,\n        nms_pre=100,\n        max_num=50))\n"
  },
  {
    "path": "configs/_base_/models/hv_second_secfpn_waymo.py",
    "content": "# model settings\n# Voxel size for voxel encoder\n# Usually voxel size is changed consistently with the point cloud range\n# If point cloud range is modified, do remember to change all related\n# keys in the config.\nvoxel_size = [0.08, 0.08, 0.1]\nmodel = dict(\n    type='VoxelNet',\n    voxel_layer=dict(\n        max_num_points=10,\n        point_cloud_range=[-76.8, -51.2, -2, 76.8, 51.2, 4],\n        voxel_size=voxel_size,\n        max_voxels=(80000, 90000)),\n    voxel_encoder=dict(type='HardSimpleVFE', num_features=5),\n    middle_encoder=dict(\n        type='SparseEncoder',\n        in_channels=5,\n        sparse_shape=[61, 1280, 1920],\n        order=('conv', 'norm', 'act')),\n    backbone=dict(\n        type='SECOND',\n        in_channels=384,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        layer_nums=[5, 5],\n        layer_strides=[1, 2],\n        out_channels=[128, 256]),\n    neck=dict(\n        type='SECONDFPN',\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        in_channels=[128, 256],\n        upsample_strides=[1, 2],\n        out_channels=[256, 256]),\n    bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=3,\n        in_channels=512,\n        feat_channels=512,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='AlignedAnchor3DRangeGenerator',\n            ranges=[[-76.8, -51.2, -0.0345, 76.8, 51.2, -0.0345],\n                    [-76.8, -51.2, 0, 76.8, 51.2, 0],\n                    [-76.8, -51.2, -0.1188, 76.8, 51.2, -0.1188]],\n            sizes=[\n                [4.73, 2.08, 1.77],  # car\n                [0.91, 0.84, 1.74],  # pedestrian\n                [1.81, 0.84, 1.77]  # cyclist\n            ],\n            rotations=[0, 1.57],\n            reshape_out=False),\n        diff_rad_by_sin=True,\n        dir_offset=-0.7854,  # -pi / 4\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    # model training and testing settings\n    train_cfg=dict(\n        assigner=[\n            dict(  # car\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.55,\n                neg_iou_thr=0.4,\n                min_pos_iou=0.4,\n                ignore_iof_thr=-1),\n            dict(  # pedestrian\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                ignore_iof_thr=-1),\n            dict(  # cyclist\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                ignore_iof_thr=-1)\n        ],\n        allowed_border=0,\n        code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        use_rotate_nms=True,\n        nms_across_levels=False,\n        nms_pre=4096,\n        nms_thr=0.25,\n        score_thr=0.1,\n        min_bbox_size=0,\n        max_num=500))\n"
  },
  {
    "path": "configs/_base_/models/imvotenet_image.py",
    "content": "model = dict(\n    type='ImVoteNet',\n    img_backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        norm_eval=True,\n        style='caffe'),\n    img_neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    img_rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            scales=[8],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[4, 8, 16, 32, 64]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),\n    img_roi_head=dict(\n        type='StandardRoIHead',\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        bbox_head=dict(\n            type='Shared2FCBBoxHead',\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=10,\n            bbox_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[0., 0., 0., 0.],\n                target_stds=[0.1, 0.1, 0.2, 0.2]),\n            reg_class_agnostic=False,\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),\n\n    # model training and testing settings\n    train_cfg=dict(\n        img_rpn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False),\n        img_rpn_proposal=dict(\n            nms_across_levels=False,\n            nms_pre=2000,\n            nms_post=1000,\n            max_per_img=1000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        img_rcnn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                match_low_quality=False,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_weight=-1,\n            debug=False)),\n    test_cfg=dict(\n        img_rpn=dict(\n            nms_across_levels=False,\n            nms_pre=1000,\n            nms_post=1000,\n            max_per_img=1000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        img_rcnn=dict(\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100)))\n"
  },
  {
    "path": "configs/_base_/models/mask_rcnn_r50_fpn.py",
    "content": "# model settings\nmodel = dict(\n    type='MaskRCNN',\n    pretrained='torchvision://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            scales=[8],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[4, 8, 16, 32, 64]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),\n    roi_head=dict(\n        type='StandardRoIHead',\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        bbox_head=dict(\n            type='Shared2FCBBoxHead',\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=80,\n            bbox_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[0., 0., 0., 0.],\n                target_stds=[0.1, 0.1, 0.2, 0.2]),\n            reg_class_agnostic=False,\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox=dict(type='L1Loss', loss_weight=1.0)),\n        mask_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        mask_head=dict(\n            type='FCNMaskHead',\n            num_convs=4,\n            in_channels=256,\n            conv_out_channels=256,\n            num_classes=80,\n            loss_mask=dict(\n                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False),\n        rpn_proposal=dict(\n            nms_across_levels=False,\n            nms_pre=2000,\n            nms_post=1000,\n            max_per_img=1000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False)),\n    test_cfg=dict(\n        rpn=dict(\n            nms_across_levels=False,\n            nms_pre=1000,\n            nms_post=1000,\n            max_per_img=1000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=dict(\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100,\n            mask_thr_binary=0.5)))\n"
  },
  {
    "path": "configs/_base_/models/paconv_cuda_ssg.py",
    "content": "_base_ = './paconv_ssg.py'\n\nmodel = dict(\n    backbone=dict(\n        sa_cfg=dict(\n            type='PAConvCUDASAModule',\n            scorenet_cfg=dict(mlp_channels=[8, 16, 16]))))\n"
  },
  {
    "path": "configs/_base_/models/paconv_ssg.py",
    "content": "# model settings\nmodel = dict(\n    type='EncoderDecoder3D',\n    backbone=dict(\n        type='PointNet2SASSG',\n        in_channels=9,  # [xyz, rgb, normalized_xyz]\n        num_points=(1024, 256, 64, 16),\n        radius=(None, None, None, None),  # use kNN instead of ball query\n        num_samples=(32, 32, 32, 32),\n        sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,\n                                                                    512)),\n        fp_channels=(),\n        norm_cfg=dict(type='BN2d', momentum=0.1),\n        sa_cfg=dict(\n            type='PAConvSAModule',\n            pool_mod='max',\n            use_xyz=True,\n            normalize_xyz=False,\n            paconv_num_kernels=[16, 16, 16],\n            paconv_kernel_input='w_neighbor',\n            scorenet_input='w_neighbor_dist',\n            scorenet_cfg=dict(\n                mlp_channels=[16, 16, 16],\n                score_norm='softmax',\n                temp_factor=1.0,\n                last_bn=False))),\n    decode_head=dict(\n        type='PAConvHead',\n        # PAConv model's decoder takes skip connections from beckbone\n        # different from PointNet++, it also concats input features in the last\n        # level of decoder, leading to `128 + 6` as the channel number\n        fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),\n                     (128 + 6, 128, 128, 128)),\n        channels=128,\n        dropout_ratio=0.5,\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d'),\n        act_cfg=dict(type='ReLU'),\n        loss_decode=dict(\n            type='CrossEntropyLoss',\n            use_sigmoid=False,\n            class_weight=None,  # should be modified with dataset\n            loss_weight=1.0)),\n    # correlation loss to regularize PAConv's kernel weights\n    loss_regularization=dict(\n        type='PAConvRegularizationLoss', reduction='sum', loss_weight=10.0),\n    # model training and testing settings\n    train_cfg=dict(),\n    test_cfg=dict(mode='slide'))\n"
  },
  {
    "path": "configs/_base_/models/parta2.py",
    "content": "# model settings\nvoxel_size = [0.05, 0.05, 0.1]\npoint_cloud_range = [0, -40, -3, 70.4, 40, 1]\n\nmodel = dict(\n    type='PartA2',\n    voxel_layer=dict(\n        max_num_points=5,  # max_points_per_voxel\n        point_cloud_range=point_cloud_range,\n        voxel_size=voxel_size,\n        max_voxels=(16000, 40000)  # (training, testing) max_voxels\n    ),\n    voxel_encoder=dict(type='HardSimpleVFE'),\n    middle_encoder=dict(\n        type='SparseUNet',\n        in_channels=4,\n        sparse_shape=[41, 1600, 1408],\n        order=('conv', 'norm', 'act')),\n    backbone=dict(\n        type='SECOND',\n        in_channels=256,\n        layer_nums=[5, 5],\n        layer_strides=[1, 2],\n        out_channels=[128, 256]),\n    neck=dict(\n        type='SECONDFPN',\n        in_channels=[128, 256],\n        upsample_strides=[1, 2],\n        out_channels=[256, 256]),\n    rpn_head=dict(\n        type='PartA2RPNHead',\n        num_classes=3,\n        in_channels=512,\n        feat_channels=512,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='Anchor3DRangeGenerator',\n            ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6],\n                    [0, -40.0, -0.6, 70.4, 40.0, -0.6],\n                    [0, -40.0, -1.78, 70.4, 40.0, -1.78]],\n            sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]],\n            rotations=[0, 1.57],\n            reshape_out=False),\n        diff_rad_by_sin=True,\n        assigner_per_size=True,\n        assign_per_class=True,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    roi_head=dict(\n        type='PartAggregationROIHead',\n        num_classes=3,\n        semantic_head=dict(\n            type='PointwiseSemanticHead',\n            in_channels=16,\n            extra_width=0.2,\n            seg_score_thr=0.3,\n            num_classes=3,\n            loss_seg=dict(\n                type='FocalLoss',\n                use_sigmoid=True,\n                reduction='sum',\n                gamma=2.0,\n                alpha=0.25,\n                loss_weight=1.0),\n            loss_part=dict(\n                type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),\n        seg_roi_extractor=dict(\n            type='Single3DRoIAwareExtractor',\n            roi_layer=dict(\n                type='RoIAwarePool3d',\n                out_size=14,\n                max_pts_per_voxel=128,\n                mode='max')),\n        part_roi_extractor=dict(\n            type='Single3DRoIAwareExtractor',\n            roi_layer=dict(\n                type='RoIAwarePool3d',\n                out_size=14,\n                max_pts_per_voxel=128,\n                mode='avg')),\n        bbox_head=dict(\n            type='PartA2BboxHead',\n            num_classes=3,\n            seg_in_channels=16,\n            part_in_channels=4,\n            seg_conv_channels=[64, 64],\n            part_conv_channels=[64, 64],\n            merge_conv_channels=[128, 128],\n            down_conv_channels=[128, 256],\n            bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n            shared_fc_channels=[256, 512, 512, 512],\n            cls_channels=[256, 256],\n            reg_channels=[256, 256],\n            dropout_ratio=0.1,\n            roi_feat_size=14,\n            with_corner_loss=True,\n            loss_bbox=dict(\n                type='SmoothL1Loss',\n                beta=1.0 / 9.0,\n                reduction='sum',\n                loss_weight=1.0),\n            loss_cls=dict(\n                type='CrossEntropyLoss',\n                use_sigmoid=True,\n                reduction='sum',\n                loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            assigner=[\n                dict(  # for Pedestrian\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.35,\n                    min_pos_iou=0.35,\n                    ignore_iof_thr=-1),\n                dict(  # for Cyclist\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.35,\n                    min_pos_iou=0.35,\n                    ignore_iof_thr=-1),\n                dict(  # for Car\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.6,\n                    neg_iou_thr=0.45,\n                    min_pos_iou=0.45,\n                    ignore_iof_thr=-1)\n            ],\n            allowed_border=0,\n            pos_weight=-1,\n            debug=False),\n        rpn_proposal=dict(\n            nms_pre=9000,\n            nms_post=512,\n            max_num=512,\n            nms_thr=0.8,\n            score_thr=0,\n            use_rotate_nms=False),\n        rcnn=dict(\n            assigner=[\n                dict(  # for Pedestrian\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(\n                        type='BboxOverlaps3D', coordinate='lidar'),\n                    pos_iou_thr=0.55,\n                    neg_iou_thr=0.55,\n                    min_pos_iou=0.55,\n                    ignore_iof_thr=-1),\n                dict(  # for Cyclist\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(\n                        type='BboxOverlaps3D', coordinate='lidar'),\n                    pos_iou_thr=0.55,\n                    neg_iou_thr=0.55,\n                    min_pos_iou=0.55,\n                    ignore_iof_thr=-1),\n                dict(  # for Car\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(\n                        type='BboxOverlaps3D', coordinate='lidar'),\n                    pos_iou_thr=0.55,\n                    neg_iou_thr=0.55,\n                    min_pos_iou=0.55,\n                    ignore_iof_thr=-1)\n            ],\n            sampler=dict(\n                type='IoUNegPiecewiseSampler',\n                num=128,\n                pos_fraction=0.55,\n                neg_piece_fractions=[0.8, 0.2],\n                neg_iou_piece_thrs=[0.55, 0.1],\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False,\n                return_iou=True),\n            cls_pos_thr=0.75,\n            cls_neg_thr=0.25)),\n    test_cfg=dict(\n        rpn=dict(\n            nms_pre=1024,\n            nms_post=100,\n            max_num=100,\n            nms_thr=0.7,\n            score_thr=0,\n            use_rotate_nms=True),\n        rcnn=dict(\n            use_rotate_nms=True,\n            use_raw_score=True,\n            nms_thr=0.01,\n            score_thr=0.1)))\n"
  },
  {
    "path": "configs/_base_/models/pgd.py",
    "content": "_base_ = './fcos3d.py'\n# model settings\nmodel = dict(\n    bbox_head=dict(\n        _delete_=True,\n        type='PGDHead',\n        num_classes=10,\n        in_channels=256,\n        stacked_convs=2,\n        feat_channels=256,\n        use_direction_classifier=True,\n        diff_rad_by_sin=True,\n        pred_attrs=True,\n        pred_velo=True,\n        pred_bbox2d=True,\n        pred_keypoints=False,\n        dir_offset=0.7854,  # pi/4\n        strides=[8, 16, 32, 64, 128],\n        group_reg_dims=(2, 1, 3, 1, 2),  # offset, depth, size, rot, velo\n        cls_branch=(256, ),\n        reg_branch=(\n            (256, ),  # offset\n            (256, ),  # depth\n            (256, ),  # size\n            (256, ),  # rot\n            ()  # velo\n        ),\n        dir_branch=(256, ),\n        attr_branch=(256, ),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n        loss_attr=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n        loss_centerness=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        norm_on_bbox=True,\n        centerness_on_reg=True,\n        center_sampling=True,\n        conv_bias=True,\n        dcn_on_last_conv=True,\n        use_depth_classifier=True,\n        depth_branch=(256, ),\n        depth_range=(0, 50),\n        depth_unit=10,\n        division='uniform',\n        depth_bins=6,\n        bbox_coder=dict(type='PGDBBoxCoder', code_size=9)),\n    test_cfg=dict(nms_pre=1000, nms_thr=0.8, score_thr=0.01, max_per_img=200))\n"
  },
  {
    "path": "configs/_base_/models/point_rcnn.py",
    "content": "model = dict(\n    type='PointRCNN',\n    backbone=dict(\n        type='PointNet2SAMSG',\n        in_channels=4,\n        num_points=(4096, 1024, 256, 64),\n        radii=((0.1, 0.5), (0.5, 1.0), (1.0, 2.0), (2.0, 4.0)),\n        num_samples=((16, 32), (16, 32), (16, 32), (16, 32)),\n        sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96,\n                                                                    128)),\n                     ((128, 196, 256), (128, 196, 256)), ((256, 256, 512),\n                                                          (256, 384, 512))),\n        fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')),\n        fps_sample_range_lists=((-1), (-1), (-1), (-1)),\n        aggregation_channels=(None, None, None, None),\n        dilated_group=(False, False, False, False),\n        out_indices=(0, 1, 2, 3),\n        norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),\n        sa_cfg=dict(\n            type='PointSAModuleMSG',\n            pool_mod='max',\n            use_xyz=True,\n            normalize_xyz=False)),\n    neck=dict(\n        type='PointNetFPNeck',\n        fp_channels=((1536, 512, 512), (768, 512, 512), (608, 256, 256),\n                     (257, 128, 128))),\n    rpn_head=dict(\n        type='PointRPNHead',\n        num_classes=3,\n        enlarge_width=0.1,\n        pred_layer_cfg=dict(\n            in_channels=128,\n            cls_linear_channels=(256, 256),\n            reg_linear_channels=(256, 256)),\n        cls_loss=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            reduction='sum',\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        bbox_loss=dict(\n            type='SmoothL1Loss',\n            beta=1.0 / 9.0,\n            reduction='sum',\n            loss_weight=1.0),\n        bbox_coder=dict(\n            type='PointXYZWHLRBBoxCoder',\n            code_size=8,\n            # code_size: (center residual (3), size regression (3),\n            #             torch.cos(yaw) (1), torch.sin(yaw) (1)\n            use_mean_size=True,\n            mean_size=[[3.9, 1.6, 1.56], [0.8, 0.6, 1.73], [1.76, 0.6,\n                                                            1.73]])),\n    roi_head=dict(\n        type='PointRCNNRoIHead',\n        point_roi_extractor=dict(\n            type='Single3DRoIPointExtractor',\n            roi_layer=dict(type='RoIPointPool3d', num_sampled_points=512)),\n        bbox_head=dict(\n            type='PointRCNNBboxHead',\n            num_classes=1,\n            pred_layer_cfg=dict(\n                in_channels=512,\n                cls_conv_channels=(256, 256),\n                reg_conv_channels=(256, 256),\n                bias=True),\n            in_channels=5,\n            # 5 = 3 (xyz) + scores + depth\n            mlp_channels=[128, 128],\n            num_points=(128, 32, -1),\n            radius=(0.2, 0.4, 100),\n            num_samples=(16, 16, 16),\n            sa_channels=((128, 128, 128), (128, 128, 256), (256, 256, 512)),\n            with_corner_loss=True),\n        depth_normalizer=70.0),\n    # model training and testing settings\n    train_cfg=dict(\n        pos_distance_thr=10.0,\n        rpn=dict(\n            nms_cfg=dict(\n                use_rotate_nms=True, iou_thr=0.8, nms_pre=9000, nms_post=512),\n            score_thr=None),\n        rcnn=dict(\n            assigner=[\n                dict(  # for Car\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(\n                        type='BboxOverlaps3D', coordinate='lidar'),\n                    pos_iou_thr=0.55,\n                    neg_iou_thr=0.55,\n                    min_pos_iou=0.55,\n                    ignore_iof_thr=-1,\n                    match_low_quality=False),\n                dict(  # for Pedestrian\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(\n                        type='BboxOverlaps3D', coordinate='lidar'),\n                    pos_iou_thr=0.55,\n                    neg_iou_thr=0.55,\n                    min_pos_iou=0.55,\n                    ignore_iof_thr=-1,\n                    match_low_quality=False),\n                dict(  # for Cyclist\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(\n                        type='BboxOverlaps3D', coordinate='lidar'),\n                    pos_iou_thr=0.55,\n                    neg_iou_thr=0.55,\n                    min_pos_iou=0.55,\n                    ignore_iof_thr=-1,\n                    match_low_quality=False)\n            ],\n            sampler=dict(\n                type='IoUNegPiecewiseSampler',\n                num=128,\n                pos_fraction=0.5,\n                neg_piece_fractions=[0.8, 0.2],\n                neg_iou_piece_thrs=[0.55, 0.1],\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False,\n                return_iou=True),\n            cls_pos_thr=0.7,\n            cls_neg_thr=0.25)),\n    test_cfg=dict(\n        rpn=dict(\n            nms_cfg=dict(\n                use_rotate_nms=True, iou_thr=0.85, nms_pre=9000, nms_post=512),\n            score_thr=None),\n        rcnn=dict(use_rotate_nms=True, nms_thr=0.1, score_thr=0.1)))\n"
  },
  {
    "path": "configs/_base_/models/pointnet2_msg.py",
    "content": "_base_ = './pointnet2_ssg.py'\n\n# model settings\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='PointNet2SAMSG',\n        in_channels=6,  # [xyz, rgb], should be modified with dataset\n        num_points=(1024, 256, 64, 16),\n        radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)),\n        num_samples=((16, 32), (16, 32), (16, 32), (16, 32)),\n        sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96,\n                                                                    128)),\n                     ((128, 196, 256), (128, 196, 256)), ((256, 256, 512),\n                                                          (256, 384, 512))),\n        aggregation_channels=(None, None, None, None),\n        fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')),\n        fps_sample_range_lists=((-1), (-1), (-1), (-1)),\n        dilated_group=(False, False, False, False),\n        out_indices=(0, 1, 2, 3),\n        sa_cfg=dict(\n            type='PointSAModuleMSG',\n            pool_mod='max',\n            use_xyz=True,\n            normalize_xyz=False)),\n    decode_head=dict(\n        fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128),\n                     (128, 128, 128, 128))))\n"
  },
  {
    "path": "configs/_base_/models/pointnet2_ssg.py",
    "content": "# model settings\nmodel = dict(\n    type='EncoderDecoder3D',\n    backbone=dict(\n        type='PointNet2SASSG',\n        in_channels=6,  # [xyz, rgb], should be modified with dataset\n        num_points=(1024, 256, 64, 16),\n        radius=(0.1, 0.2, 0.4, 0.8),\n        num_samples=(32, 32, 32, 32),\n        sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,\n                                                                    512)),\n        fp_channels=(),\n        norm_cfg=dict(type='BN2d'),\n        sa_cfg=dict(\n            type='PointSAModule',\n            pool_mod='max',\n            use_xyz=True,\n            normalize_xyz=False)),\n    decode_head=dict(\n        type='PointNet2Head',\n        fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),\n                     (128, 128, 128, 128)),\n        channels=128,\n        dropout_ratio=0.5,\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d'),\n        act_cfg=dict(type='ReLU'),\n        loss_decode=dict(\n            type='CrossEntropyLoss',\n            use_sigmoid=False,\n            class_weight=None,  # should be modified with dataset\n            loss_weight=1.0)),\n    # model training and testing settings\n    train_cfg=dict(),\n    test_cfg=dict(mode='slide'))\n"
  },
  {
    "path": "configs/_base_/models/smoke.py",
    "content": "model = dict(\n    type='SMOKEMono3D',\n    backbone=dict(\n        type='DLANet',\n        depth=34,\n        in_channels=3,\n        norm_cfg=dict(type='GN', num_groups=32),\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth'\n        )),\n    neck=dict(\n        type='DLANeck',\n        in_channels=[16, 32, 64, 128, 256, 512],\n        start_level=2,\n        end_level=5,\n        norm_cfg=dict(type='GN', num_groups=32)),\n    bbox_head=dict(\n        type='SMOKEMono3DHead',\n        num_classes=3,\n        in_channels=64,\n        dim_channel=[3, 4, 5],\n        ori_channel=[6, 7],\n        stacked_convs=0,\n        feat_channels=64,\n        use_direction_classifier=False,\n        diff_rad_by_sin=False,\n        pred_attrs=False,\n        pred_velo=False,\n        dir_offset=0,\n        strides=None,\n        group_reg_dims=(8, ),\n        cls_branch=(256, ),\n        reg_branch=((256, ), ),\n        num_attrs=0,\n        bbox_code_size=7,\n        dir_branch=(),\n        attr_branch=(),\n        bbox_coder=dict(\n            type='SMOKECoder',\n            base_depth=(28.01, 16.32),\n            base_dims=((0.88, 1.73, 0.67), (1.78, 1.70, 0.58), (3.88, 1.63,\n                                                                1.53)),\n            code_size=7),\n        loss_cls=dict(type='GaussianFocalLoss', loss_weight=1.0),\n        loss_bbox=dict(type='L1Loss', reduction='sum', loss_weight=1 / 300),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n        loss_attr=None,\n        conv_bias=True,\n        dcn_on_last_conv=False),\n    train_cfg=None,\n    test_cfg=dict(topK=100, local_maximum_kernel=3, max_per_img=100))\n"
  },
  {
    "path": "configs/_base_/models/votenet.py",
    "content": "model = dict(\n    type='VoteNet',\n    backbone=dict(\n        type='PointNet2SASSG',\n        in_channels=4,\n        num_points=(2048, 1024, 512, 256),\n        radius=(0.2, 0.4, 0.8, 1.2),\n        num_samples=(64, 32, 16, 16),\n        sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),\n                     (128, 128, 256)),\n        fp_channels=((256, 256), (256, 256)),\n        norm_cfg=dict(type='BN2d'),\n        sa_cfg=dict(\n            type='PointSAModule',\n            pool_mod='max',\n            use_xyz=True,\n            normalize_xyz=True)),\n    bbox_head=dict(\n        type='VoteHead',\n        vote_module_cfg=dict(\n            in_channels=256,\n            vote_per_seed=1,\n            gt_per_seed=3,\n            conv_channels=(256, 256),\n            conv_cfg=dict(type='Conv1d'),\n            norm_cfg=dict(type='BN1d'),\n            norm_feats=True,\n            vote_loss=dict(\n                type='ChamferDistance',\n                mode='l1',\n                reduction='none',\n                loss_dst_weight=10.0)),\n        vote_aggregation_cfg=dict(\n            type='PointSAModule',\n            num_point=256,\n            radius=0.3,\n            num_sample=16,\n            mlp_channels=[256, 128, 128, 128],\n            use_xyz=True,\n            normalize_xyz=True),\n        pred_layer_cfg=dict(\n            in_channels=128, shared_conv_channels=(128, 128), bias=True),\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d'),\n        objectness_loss=dict(\n            type='CrossEntropyLoss',\n            class_weight=[0.2, 0.8],\n            reduction='sum',\n            loss_weight=5.0),\n        center_loss=dict(\n            type='ChamferDistance',\n            mode='l2',\n            reduction='sum',\n            loss_src_weight=10.0,\n            loss_dst_weight=10.0),\n        dir_class_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n        dir_res_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=10.0),\n        size_class_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n        size_res_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),\n        semantic_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),\n    # model training and testing settings\n    train_cfg=dict(\n        pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'),\n    test_cfg=dict(\n        sample_mod='seed',\n        nms_thr=0.25,\n        score_thr=0.05,\n        per_class_proposal=True))\n"
  },
  {
    "path": "configs/_base_/schedules/cosine.py",
    "content": "# This schedule is mainly used by models with dynamic voxelization\n# optimizer\nlr = 0.003  # max learning rate\noptimizer = dict(\n    type='AdamW',\n    lr=lr,\n    betas=(0.95, 0.99),  # the momentum is change during training\n    weight_decay=0.001)\noptimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))\n\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=1000,\n    warmup_ratio=1.0 / 10,\n    min_lr_ratio=1e-5)\n\nmomentum_config = None\n\nrunner = dict(type='EpochBasedRunner', max_epochs=40)\n"
  },
  {
    "path": "configs/_base_/schedules/cyclic_20e.py",
    "content": "# For nuScenes dataset, we usually evaluate the model at the end of training.\n# Since the models are trained by 24 epochs by default, we set evaluation\n# interval to be 20. Please change the interval accordingly if you do not\n# use a default schedule.\n# optimizer\n# This schedule is mainly used by models on nuScenes dataset\noptimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01)\n# max_norm=10 is better for SECOND\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n    policy='cyclic',\n    target_ratio=(10, 1e-4),\n    cyclic_times=1,\n    step_ratio_up=0.4,\n)\nmomentum_config = dict(\n    policy='cyclic',\n    target_ratio=(0.85 / 0.95, 1),\n    cyclic_times=1,\n    step_ratio_up=0.4,\n)\n\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=20)\n"
  },
  {
    "path": "configs/_base_/schedules/cyclic_40e.py",
    "content": "# The schedule is usually used by models trained on KITTI dataset\n\n# The learning rate set in the cyclic schedule is the initial learning rate\n# rather than the max learning rate. Since the target_ratio is (10, 1e-4),\n# the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4\nlr = 0.0018\n# The optimizer follows the setting in SECOND.Pytorch, but here we use\n# the official AdamW optimizer implemented by PyTorch.\noptimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)\noptimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))\n# We use cyclic learning rate and momentum schedule following SECOND.Pytorch\n# https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69  # noqa\n# We implement them in mmcv, for more details, please refer to\n# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327  # noqa\n# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130  # noqa\nlr_config = dict(\n    policy='cyclic',\n    target_ratio=(10, 1e-4),\n    cyclic_times=1,\n    step_ratio_up=0.4,\n)\nmomentum_config = dict(\n    policy='cyclic',\n    target_ratio=(0.85 / 0.95, 1),\n    cyclic_times=1,\n    step_ratio_up=0.4,\n)\n# Although the max_epochs is 40, this schedule is usually used we\n# RepeatDataset with repeat ratio N, thus the actual max epoch\n# number could be Nx40\nrunner = dict(type='EpochBasedRunner', max_epochs=40)\n"
  },
  {
    "path": "configs/_base_/schedules/mmdet_schedule_1x.py",
    "content": "# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=None)\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=0.001,\n    step=[8, 11])\nrunner = dict(type='EpochBasedRunner', max_epochs=12)\n"
  },
  {
    "path": "configs/_base_/schedules/schedule_2x.py",
    "content": "# optimizer\n# This schedule is mainly used by models on nuScenes dataset\noptimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)\n# max_norm=10 is better for SECOND\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=1000,\n    warmup_ratio=1.0 / 1000,\n    step=[20, 23])\nmomentum_config = None\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "configs/_base_/schedules/schedule_3x.py",
    "content": "# optimizer\n# This schedule is mainly used by models on indoor dataset,\n# e.g., VoteNet on SUNRGBD and ScanNet\nlr = 0.008  # max learning rate\noptimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)\noptimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))\nlr_config = dict(policy='step', warmup=None, step=[24, 32])\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=36)\n"
  },
  {
    "path": "configs/_base_/schedules/seg_cosine_100e.py",
    "content": "# optimizer\n# This schedule is mainly used on S3DIS dataset in segmentation task\noptimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=None)\nlr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)\n\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=100)\n"
  },
  {
    "path": "configs/_base_/schedules/seg_cosine_150e.py",
    "content": "# optimizer\n# This schedule is mainly used on S3DIS dataset in segmentation task\noptimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9)\noptimizer_config = dict(grad_clip=None)\nlr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002)\nmomentum_config = None\n\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=150)\n"
  },
  {
    "path": "configs/_base_/schedules/seg_cosine_200e.py",
    "content": "# optimizer\n# This schedule is mainly used on ScanNet dataset in segmentation task\noptimizer = dict(type='Adam', lr=0.001, weight_decay=0.01)\noptimizer_config = dict(grad_clip=None)\nlr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)\nmomentum_config = None\n\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=200)\n"
  },
  {
    "path": "configs/_base_/schedules/seg_cosine_50e.py",
    "content": "# optimizer\n# This schedule is mainly used on S3DIS dataset in segmentation task\noptimizer = dict(type='Adam', lr=0.001, weight_decay=0.001)\noptimizer_config = dict(grad_clip=None)\nlr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)\nmomentum_config = None\n\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=50)\n"
  },
  {
    "path": "configs/bev_next/bev_planner.py",
    "content": "# Copyright (c) 2023-2024, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# TODO: add license here\n\n\n\n# we follow the online training settings  from solofusion\nnum_gpus = 8\nsamples_per_gpu = 4\nnum_iters_per_epoch = int(28130 // (num_gpus * samples_per_gpu) )\nnum_epochs = 12\ncheckpoint_epoch_interval = 1\nuse_custom_eval_hook=True\n\n# Each nuScenes sequence is ~40 keyframes long. Our training procedure samples\n# sequences first, then loads frames from the sampled sequence in order \n# starting from the first frame. This reduces training step-to-step diversity,\n# lowering performance. To increase diversity, we split each training sequence\n# in half to ~20 keyframes, and sample these shorter sequences during training.\n# During testing, we do not do this splitting.\ntrain_sequences_split_num = 4\ntest_sequences_split_num = 1\n\n# By default, 3D detection datasets randomly choose another sample if there is\n# no GT object in the current sample. This does not make sense when doing\n# sequential sampling of frames, so we disable it.\nfilter_empty_gt = False\n\n# Long-Term Fusion Parameters\ndo_history = False\nhistory_cat_num = 4\nhistory_cat_conv_out_channels = 160\n\n_base_ = ['../_base_/datasets/nus-3d.py', '../_base_/default_runtime.py']\n# Global\n# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\n# bev configs\nroi_size = (102.4, 102.4)\nbev_h = 128\nbev_w = 128\npoint_cloud_range = [-roi_size[0]/2, -roi_size[1]/2, -5, roi_size[0]/2, roi_size[1]/2, 3]\n\n# For nuScenes we usually do 10-class detection\nclass_names = [\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\n\ndata_config = {\n    'cams': [\n        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',\n        'CAM_BACK', 'CAM_BACK_RIGHT'\n    ],\n    'Ncams':\n    6,\n    'input_size': (256, 704),\n    'src_size': (900, 1600),\n    # Augmentation\n    'resize': (0.38, 0.55),\n    'rot': (0, 0),\n    'flip': True,\n    'crop_h': (0.0, 0.0),\n    'resize_test': 0.00,\n}\nbda_aug_conf = dict(\n    rot_lim=(-0, 0),\n    scale_lim=(1., 1.),\n    flip_dx_ratio=0.,\n    flip_dy_ratio=0.)\nvoxel_size = [0.2, 0.2, 8]\nuse_checkpoint = False\nsync_bn = True\n# Model\ngrid_config = {\n    'x': [-51.2, 51.2, 0.8],\n    'y': [-51.2, 51.2, 0.8],\n    'z': [-5, 3, 8],\n    'depth': [1.0, 60.0, 0.5],\n}\ndepth_categories = 118 #(grid_config['depth'][1]-grid_config['depth'][0])//grid_config['depth'][2]\n\nnumC_Trans=80\n_dim_ = 256\n\n### occupancy config\nempty_idx = 18  # noise 0-->255\nnum_cls = 19  # 0 others, 1-16 obj, 17 free\nfix_void = num_cls == 19\n###\n\nmap_classes = ['divider', 'ped_crossing', 'boundary']\nmap_num_vec = 100\nmap_fixed_ptsnum_per_gt_line = 20 # now only support fixed_pts > 0\nmap_fixed_ptsnum_per_pred_line = 20\nmap_eval_use_same_gt_sample_num_flag = True\nmap_num_classes = len(map_classes)\n\nembed_dims = 256\nnum_feat_levels = 1\nnorm_cfg = dict(type='BN2d')\nnum_queries = 100\n\n# category configs\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\nnum_class = max(list(cat2id.values())) + 1\n\n\nnum_points = 20\npermute = True\nwith_ego_as_agent = False\n###\nmodel = dict(\n    type='BEVPlanner',\n    use_depth_supervision=False,\n    fix_void=fix_void,\n    do_history = do_history,\n    history_cat_num=history_cat_num,\n    single_bev_num_channels=numC_Trans,\n    fuse_history_bev=True,\n    use_grid_mask=True,\n    align_prev_bev=False,\n    img_backbone=dict(       \n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(2, 3),\n        frozen_stages=-1,\n        norm_cfg=dict(type='BN2d', requires_grad=False),\n        norm_eval=True,\n        with_cp=False,\n        # pretrained='torchvision://resnet50',\n        init_cfg=dict(\n            type='Pretrained', checkpoint=\"ckpts/cascade_mask_rcnn_r50_fpn_coco-20e_20e_nuim_20201009_124951-40963960.pth\",\n            prefix='backbone.'),\n        style='pytorch'),\n    img_neck=dict(\n        type='CustomFPN',\n        in_channels=[1024, 2048],\n        out_channels=_dim_,\n        num_outs=1,\n        start_level=0,\n        with_cp=use_checkpoint,\n        out_ids=[0]),\n    depth_net=dict(\n        type='CM_DepthNet', # camera-aware depth net\n        in_channels=_dim_,\n        context_channels=numC_Trans,\n        downsample=16,\n        grid_config=grid_config,\n        depth_channels=depth_categories,\n        with_cp=use_checkpoint,\n        loss_depth_weight=1.,\n        aspp_mid_channels=96,\n        use_dcn=False,\n    ),\n    forward_projection=dict(\n        type='LSSViewTransformerFunction',\n        grid_config=grid_config,\n        input_size=data_config['input_size'],\n        downsample=16),\n    frpn=None,\n    backward_projection=None,\n    img_bev_encoder_backbone=dict(\n        type='CustomResNet',\n        numC_input=numC_Trans,\n        num_channels=[numC_Trans * 2, numC_Trans * 4, numC_Trans * 8]),\n    img_bev_encoder_neck=dict(\n        type='FPN_LSS',\n        in_channels=numC_Trans * 8 + numC_Trans * 2,\n        out_channels=256),\n    occupancy_head=None,\n    img_det_2d_head=None,\n    pts_bbox_head=None,\n    map_head=None,\n    motion_head=None,\n    planner_head=dict(\n        type='NaivePlannerHead'\n    ),\n    # model training and testing settings\n    train_cfg=dict(pts=dict(\n            grid_size=[512, 512, 1],\n            voxel_size=voxel_size,\n            point_cloud_range=point_cloud_range,\n            out_size_factor=4,\n            assigner=None),\n    )\n)\n\n\n# Data\ndataset_type = 'NuScenesDataset'\ndata_root = 'data/nuscenes/'\nfile_client_args = dict(backend='disk')\noccupancy_path = '/mount/data/occupancy_cvpr2023/gts'\nnormalize_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(\n        type='PrepareImageInputs',\n        is_train=True,\n        normalize_cfg=normalize_cfg,\n        data_config=data_config),\n    dict(\n        type='LoadAnnotationsBEVDepth',\n        bda_aug_conf=bda_aug_conf,\n        with_2d_bbox=True,\n        classes=class_names),\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),  \n    dict(\n        type='LoadVectorMap2',\n        data_root = data_root,\n        point_cloud_range =point_cloud_range,\n        map_classes = ['divider', 'ped_crossing', 'boundary'],\n        map_num_vec = 100,\n        map_fixed_ptsnum_per_line = 20, # now only support fixed_pts > 0,\n        map_eval_use_same_gt_sample_num_flag = True,\n        map_num_classes = 3,\n    ),   \n    dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),\n    dict(type='LoadGTMotion'),\n    dict(type='LoadGTPlaner'),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    # dict(type='VisualInputsAndGT'),\n    # dict(type='LoadOccupancy', ignore_nonvisible=True, fix_void=fix_void, occupancy_path=occupancy_path),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(\n        type='Collect3D', keys=['img_inputs', 'gt_bboxes_3d', 'gt_labels_3d', 'gt_depth', 'gt_bboxes_2d', 'gt_labels_2d', 'centers2d', 'depths2d',  'map_gt_labels_3d', 'map_gt_bboxes_3d'\n                               ] + ['gt_agent_fut_traj', 'gt_agent_fut_traj_mask']+\n                               ['gt_ego_lcf_feat', 'gt_ego_fut_trajs', 'gt_ego_his_trajs', 'gt_ego_fut_cmd', 'gt_ego_fut_masks']\n                               )\n]\n\ntest_pipeline = [\n    dict(\n        type='CustomDistMultiScaleFlipAug3D',\n        tta=False,\n        transforms=[\n            dict(type='PrepareImageInputs',\n            # img_corruptions='sun', \n            data_config=data_config, normalize_cfg=normalize_cfg),\n            dict(\n                type='LoadAnnotationsBEVDepth',\n                bda_aug_conf=bda_aug_conf,\n                classes=class_names,\n                with_2d_bbox=True,\n                \n                is_train=False),\n            dict(\n                type='LoadPointsFromFile',\n                coord_type='LIDAR',\n                load_dim=5,\n                use_dim=5,\n                file_client_args=file_client_args),\n            dict(\n                type='LoadVectorMap',\n                data_root = data_root,\n                point_cloud_range =point_cloud_range,\n                map_classes = ['divider', 'ped_crossing', 'boundary'],\n                map_num_vec = 100,\n                map_fixed_ptsnum_per_line = 20, # now only support fixed_pts > 0,\n                map_eval_use_same_gt_sample_num_flag = True,\n                map_num_classes = 3,\n            ),   \n            dict(type='LoadGTPlaner'),\n            dict(type='LoadGTMotion',  with_ego_as_agent=with_ego_as_agent),   \n            dict(type='LoadFutBoxInfo'),\n            dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n            dict(type='ObjectNameFilter', classes=class_names),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points', 'img_inputs', 'gt_bboxes_3d', 'gt_labels_3d', 'map_gt_bboxes_3d', 'map_gt_labels_3d']+\n            ['gt_agent_fut_traj', 'gt_agent_fut_traj_mask']+['gt_ego_lcf_feat', 'gt_ego_fut_trajs', 'gt_ego_his_trajs', 'gt_ego_fut_cmd', 'gt_ego_fut_masks']+\n            ['gt_fut_segmentations', 'gt_fut_segmentations_plus', 'fut_boxes_in_cur_ego_list']\n            )\n            ]\n        )\n]\n\ninput_modality = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\n\nshare_data_config = dict(\n    type=dataset_type,\n    classes=class_names,\n    modality=input_modality,\n    img_info_prototype='bevdet',\n    occupancy_path=occupancy_path,\n    data_root=data_root,\n    use_sequence_group_flag=True,\n)\n\ntest_data_config = dict(\n    pipeline=test_pipeline,\n    map_ann_file=data_root + 'nuscenes_map_infos_102x102_val.pkl',\n    map_eval_cfg=dict(\n        region = (102.4, 102.4) # (H, W)\n    ),\n    load_fut_bbox_info=True,\n    sequences_split_num=test_sequences_split_num,\n    ann_file=data_root + 'bev-next-nuscenes_infos_val.pkl')\n\ndata = dict(\n    samples_per_gpu=samples_per_gpu,\n    workers_per_gpu=2,\n    test_dataloader=dict(runner_type='IterBasedRunnerEval'),\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'bev-next-nuscenes_infos_train.pkl',\n        pipeline=train_pipeline,\n        test_mode=False,\n        use_valid_flag=True,\n        \n        sequences_split_num=train_sequences_split_num,\n        filter_empty_gt=filter_empty_gt,\n        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n        # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n        box_type_3d='LiDAR'),\n    val=test_data_config,\n    test=test_data_config)\n\nfor key in ['train', 'val', 'test']:\n    data[key].update(share_data_config)\n\n\noptimizer = dict(\n    type='AdamW', \n    lr=1e-4, # bs 8: 2e-4 || bs 16: 4e-4\n    paramwise_cfg=dict(\n        custom_keys={\n            'img_backbone': dict(lr_mult=0.1), \n        }),\n    weight_decay=0.01)\n \noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    min_lr_ratio=1e-3,\n    )\n\nrunner = dict(type='IterBasedRunner', max_iters=num_epochs * num_iters_per_epoch)\ncheckpoint_config = dict(\n    interval=checkpoint_epoch_interval * num_iters_per_epoch)\nevaluation = dict(\n    interval=num_epochs * num_iters_per_epoch, pipeline=test_pipeline)\n\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n    ])\ncustom_hooks = [\n    dict(\n        type='MEGVIIEMAHook',\n        init_updates=10560,\n        priority='NORMAL',\n        interval=2*num_iters_per_epoch,\n    ),\n    dict(\n        type='SequentialControlHook',\n        temporal_start_iter=0,\n    ),\n    # dict(\n    #     type='TimerCP', \n    # )\n]\n# load_from = None\n# resume_from = None"
  },
  {
    "path": "configs/bev_next/bev_planner_plus.py",
    "content": "# Copyright (c) 2023-2024, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# TODO: add license here\n\n\n# we follow the online training settings  from solofusion\nnum_gpus = 8\nsamples_per_gpu = 4\nnum_iters_per_epoch = int(28130 // (num_gpus * samples_per_gpu) )\nnum_epochs = 12\ncheckpoint_epoch_interval = 1\nuse_custom_eval_hook=True\n\n# Each nuScenes sequence is ~40 keyframes long. Our training procedure samples\n# sequences first, then loads frames from the sampled sequence in order \n# starting from the first frame. This reduces training step-to-step diversity,\n# lowering performance. To increase diversity, we split each training sequence\n# in half to ~20 keyframes, and sample these shorter sequences during training.\n# During testing, we do not do this splitting.\ntrain_sequences_split_num = 4\ntest_sequences_split_num = 1\n\n# By default, 3D detection datasets randomly choose another sample if there is\n# no GT object in the current sample. This does not make sense when doing\n# sequential sampling of frames, so we disable it.\nfilter_empty_gt = False\n\n# Long-Term Fusion Parameters\ndo_history = False\nhistory_cat_num = 4\nhistory_cat_conv_out_channels = 160\n\n_base_ = ['../_base_/datasets/nus-3d.py', '../_base_/default_runtime.py']\n# Global\n# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\n# bev configs\nroi_size = (102.4, 102.4)\nbev_h = 128\nbev_w = 128\npoint_cloud_range = [-roi_size[0]/2, -roi_size[1]/2, -5, roi_size[0]/2, roi_size[1]/2, 3]\n\n# For nuScenes we usually do 10-class detection\nclass_names = [\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\n\ndata_config = {\n    'cams': [\n        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',\n        'CAM_BACK', 'CAM_BACK_RIGHT'\n    ],\n    'Ncams':\n    6,\n    'input_size': (256, 704),\n    'src_size': (900, 1600),\n    # Augmentation\n    'resize': (0.38, 0.55),\n    'rot': (0, 0),\n    'flip': True,\n    'crop_h': (0.0, 0.0),\n    'resize_test': 0.00,\n}\nbda_aug_conf = dict(\n    rot_lim=(-0, 0),\n    scale_lim=(1., 1.),\n    flip_dx_ratio=0.,\n    flip_dy_ratio=0.)\nvoxel_size = [0.2, 0.2, 8]\nuse_checkpoint = False\nsync_bn = True\n# Model\ngrid_config = {\n    'x': [-51.2, 51.2, 0.8],\n    'y': [-51.2, 51.2, 0.8],\n    'z': [-5, 3, 8],\n    'depth': [1.0, 60.0, 0.5],\n}\ndepth_categories = 118 #(grid_config['depth'][1]-grid_config['depth'][0])//grid_config['depth'][2]\n\nnumC_Trans=80\n_dim_ = 256\n\n### occupancy config\nempty_idx = 18  # noise 0-->255\nnum_cls = 19  # 0 others, 1-16 obj, 17 free\nfix_void = num_cls == 19\n###\n\nmap_classes = ['divider', 'ped_crossing', 'boundary']\nmap_num_vec = 100\nmap_fixed_ptsnum_per_gt_line = 20 # now only support fixed_pts > 0\nmap_fixed_ptsnum_per_pred_line = 20\nmap_eval_use_same_gt_sample_num_flag = True\nmap_num_classes = len(map_classes)\n\nembed_dims = 256\nnum_feat_levels = 1\nnorm_cfg = dict(type='BN2d')\nnum_queries = 100\n\n# category configs\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\nnum_class = max(list(cat2id.values())) + 1\n\n\nnum_points = 20\npermute = True\nwith_ego_as_agent = False\n###\nmodel = dict(\n    type='BEVPlanner',\n    use_depth_supervision=False,\n    fix_void=fix_void,\n    do_history = do_history,\n    history_cat_num=history_cat_num,\n    single_bev_num_channels=numC_Trans,\n    fuse_history_bev=True,\n    use_grid_mask=True,\n    align_prev_bev=False,\n    with_ego_status=True,\n    img_backbone=dict(      \n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(2, 3),\n        frozen_stages=-1,\n        norm_cfg=dict(type='BN2d', requires_grad=False),\n        norm_eval=True,\n        with_cp=False,\n        # pretrained='torchvision://resnet50',\n        init_cfg=dict(\n            type='Pretrained', checkpoint=\"ckpts/cascade_mask_rcnn_r50_fpn_coco-20e_20e_nuim_20201009_124951-40963960.pth\",\n            prefix='backbone.'),   \n        style='pytorch'),\n    img_neck=dict(\n        type='CustomFPN',\n        in_channels=[1024, 2048],\n        out_channels=_dim_,\n        num_outs=1,\n        start_level=0,\n        with_cp=use_checkpoint,\n        out_ids=[0]),\n    depth_net=dict(\n        type='CM_DepthNet', # camera-aware depth net\n        in_channels=_dim_,\n        context_channels=numC_Trans,\n        downsample=16,\n        grid_config=grid_config,\n        depth_channels=depth_categories,\n        with_cp=use_checkpoint,\n        loss_depth_weight=1.,\n        aspp_mid_channels=96,\n        use_dcn=False,\n    ),\n    forward_projection=dict(\n        type='LSSViewTransformerFunction',\n        grid_config=grid_config,\n        input_size=data_config['input_size'],\n        downsample=16),\n    frpn=None,\n    backward_projection=None,\n    img_bev_encoder_backbone=dict(\n        type='CustomResNet',\n        numC_input=numC_Trans,\n        num_channels=[numC_Trans * 2, numC_Trans * 4, numC_Trans * 8]),\n    img_bev_encoder_neck=dict(\n        type='FPN_LSS',\n        in_channels=numC_Trans * 8 + numC_Trans * 2,\n        out_channels=256),\n    occupancy_head=None,\n    img_det_2d_head=None,\n    pts_bbox_head=None,\n    map_head=None,\n    motion_head=None,\n    planner_head=dict(\n        type='NaivePlannerHead'\n    ),\n    # model training and testing settings\n    train_cfg=dict(pts=dict(\n            grid_size=[512, 512, 1],\n            voxel_size=voxel_size,\n            point_cloud_range=point_cloud_range,\n            out_size_factor=4,\n            assigner=None),\n    )\n)\n\n\n# Data\ndataset_type = 'NuScenesDataset'\ndata_root = 'data/nuscenes/'\nfile_client_args = dict(backend='disk')\noccupancy_path = '/mount/data/occupancy_cvpr2023/gts'\nnormalize_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(\n        type='PrepareImageInputs',\n        is_train=True,\n        normalize_cfg=normalize_cfg,\n        data_config=data_config),\n    dict(\n        type='LoadAnnotationsBEVDepth',\n        bda_aug_conf=bda_aug_conf,\n        with_2d_bbox=True,\n        classes=class_names),\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),  \n    dict(\n        type='LoadVectorMap2',\n        data_root = data_root,\n        point_cloud_range =point_cloud_range,\n        map_classes = ['divider', 'ped_crossing', 'boundary'],\n        map_num_vec = 100,\n        map_fixed_ptsnum_per_line = 20, # now only support fixed_pts > 0,\n        map_eval_use_same_gt_sample_num_flag = True,\n        map_num_classes = 3,\n    ),   \n    dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),\n    dict(type='LoadGTMotion'),\n    dict(type='LoadGTPlaner'),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    # dict(type='VisualInputsAndGT'),\n    # dict(type='LoadOccupancy', ignore_nonvisible=True, fix_void=fix_void, occupancy_path=occupancy_path),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(\n        type='Collect3D', keys=['img_inputs', 'gt_bboxes_3d', 'gt_labels_3d', 'gt_depth', 'gt_bboxes_2d', 'gt_labels_2d', 'centers2d', 'depths2d',  'map_gt_labels_3d', 'map_gt_bboxes_3d'\n                               ] + ['gt_agent_fut_traj', 'gt_agent_fut_traj_mask']+['can_bus_info']+\n                               ['gt_ego_lcf_feat', 'gt_ego_fut_trajs', 'gt_ego_his_trajs', 'gt_ego_fut_cmd', 'gt_ego_fut_masks']\n                               )\n]\n\ntest_pipeline = [\n    dict(\n        type='CustomDistMultiScaleFlipAug3D',\n        tta=False,\n        transforms=[\n            dict(type='PrepareImageInputs',\n            # img_corruptions='sun', \n            data_config=data_config, normalize_cfg=normalize_cfg),\n            dict(\n                type='LoadAnnotationsBEVDepth',\n                bda_aug_conf=bda_aug_conf,\n                classes=class_names,\n                with_2d_bbox=True,\n                \n                is_train=False),\n            dict(\n                type='LoadPointsFromFile',\n                coord_type='LIDAR',\n                load_dim=5,\n                use_dim=5,\n                file_client_args=file_client_args),\n            dict(\n                type='LoadVectorMap',\n                data_root = data_root,\n                point_cloud_range =point_cloud_range,\n                map_classes = ['divider', 'ped_crossing', 'boundary'],\n                map_num_vec = 100,\n                map_fixed_ptsnum_per_line = 20, # now only support fixed_pts > 0,\n                map_eval_use_same_gt_sample_num_flag = True,\n                map_num_classes = 3,\n            ),   \n            dict(type='LoadGTPlaner'),\n            dict(type='LoadGTMotion',  with_ego_as_agent=with_ego_as_agent),   \n            dict(type='LoadFutBoxInfo'),\n            dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n            dict(type='ObjectNameFilter', classes=class_names),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points', 'img_inputs', 'gt_bboxes_3d', 'gt_labels_3d', 'map_gt_bboxes_3d', 'map_gt_labels_3d']+\n            ['gt_agent_fut_traj', 'gt_agent_fut_traj_mask']+['gt_ego_lcf_feat', 'gt_ego_fut_trajs', 'gt_ego_his_trajs', 'gt_ego_fut_cmd', 'gt_ego_fut_masks']+\n            ['gt_fut_segmentations', 'gt_fut_segmentations_plus', 'fut_boxes_in_cur_ego_list']+ ['can_bus_info']\n            )\n            ]\n        )\n]\n\ninput_modality = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\n\nshare_data_config = dict(\n    type=dataset_type,\n    classes=class_names,\n    modality=input_modality,\n    img_info_prototype='bevdet',\n    occupancy_path=occupancy_path,\n    data_root=data_root,\n    use_sequence_group_flag=True,\n)\n\ntest_data_config = dict(\n    pipeline=test_pipeline,\n    map_ann_file=data_root + 'nuscenes_map_infos_102x102_val.pkl',\n    map_eval_cfg=dict(\n        region = (102.4, 102.4) # (H, W)\n    ),\n    load_fut_bbox_info=True,\n    sequences_split_num=test_sequences_split_num,\n    ann_file=data_root + 'bev-next-nuscenes_infos_val.pkl')\n\ndata = dict(\n    samples_per_gpu=samples_per_gpu,\n    workers_per_gpu=2,\n    test_dataloader=dict(runner_type='IterBasedRunnerEval'),\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'bev-next-nuscenes_infos_train.pkl',\n        pipeline=train_pipeline,\n        test_mode=False,\n        use_valid_flag=True,\n        \n        sequences_split_num=train_sequences_split_num,\n        filter_empty_gt=filter_empty_gt,\n        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n        # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n        box_type_3d='LiDAR'),\n    val=test_data_config,\n    test=test_data_config)\n\nfor key in ['train', 'val', 'test']:\n    data[key].update(share_data_config)\n\n\noptimizer = dict(\n    type='AdamW', \n    lr=1e-4, # bs 8: 2e-4 || bs 16: 4e-4\n    paramwise_cfg=dict(\n        custom_keys={\n            'img_backbone': dict(lr_mult=0.1), \n        }),\n    weight_decay=0.01)\n \noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    min_lr_ratio=1e-3,\n    )\n\nrunner = dict(type='IterBasedRunner', max_iters=num_epochs * num_iters_per_epoch)\ncheckpoint_config = dict(\n    interval=checkpoint_epoch_interval * num_iters_per_epoch)\nevaluation = dict(\n    interval=num_epochs * num_iters_per_epoch, pipeline=test_pipeline)\n\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n    ])\ncustom_hooks = [\n    dict(\n        type='MEGVIIEMAHook',\n        init_updates=10560,\n        priority='NORMAL',\n        interval=2*num_iters_per_epoch,\n    ),\n    dict(\n        type='SequentialControlHook',\n        temporal_start_iter=0,\n    ),\n    # dict(\n    #     type='TimerCP',\n    # )\n]\n# load_from = None\n# resume_from = None"
  },
  {
    "path": "configs/bev_next/bev_planner_plus_plus.py",
    "content": "# Copyright (c) 2023-2024, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# TODO: add license here\n\n\n\n# we follow the online training settings  from solofusion\nnum_gpus = 8\nsamples_per_gpu = 4\nnum_iters_per_epoch = int(28130 // (num_gpus * samples_per_gpu) )\nnum_epochs = 12\ncheckpoint_epoch_interval = 1\nuse_custom_eval_hook=True\n\n# Each nuScenes sequence is ~40 keyframes long. Our training procedure samples\n# sequences first, then loads frames from the sampled sequence in order \n# starting from the first frame. This reduces training step-to-step diversity,\n# lowering performance. To increase diversity, we split each training sequence\n# in half to ~20 keyframes, and sample these shorter sequences during training.\n# During testing, we do not do this splitting.\ntrain_sequences_split_num = 4\ntest_sequences_split_num = 1\n\n# By default, 3D detection datasets randomly choose another sample if there is\n# no GT object in the current sample. This does not make sense when doing\n# sequential sampling of frames, so we disable it.\nfilter_empty_gt = False\n\n# Long-Term Fusion Parameters\ndo_history = False\nhistory_cat_num = 4\nhistory_cat_conv_out_channels = 160\n\n_base_ = ['../_base_/datasets/nus-3d.py', '../_base_/default_runtime.py']\n# Global\n# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\n# bev configs\nroi_size = (102.4, 102.4)\nbev_h = 128\nbev_w = 128\npoint_cloud_range = [-roi_size[0]/2, -roi_size[1]/2, -5, roi_size[0]/2, roi_size[1]/2, 3]\n\n# For nuScenes we usually do 10-class detection\nclass_names = [\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\n\ndata_config = {\n    'cams': [\n        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',\n        'CAM_BACK', 'CAM_BACK_RIGHT'\n    ],\n    'Ncams':\n    6,\n    'input_size': (256, 704),\n    'src_size': (900, 1600),\n    # Augmentation\n    'resize': (0.38, 0.55),\n    'rot': (0, 0),\n    'flip': True,\n    'crop_h': (0.0, 0.0),\n    'resize_test': 0.00,\n}\nbda_aug_conf = dict(\n    rot_lim=(-0, 0),\n    scale_lim=(1., 1.),\n    flip_dx_ratio=0.,\n    flip_dy_ratio=0.)\nvoxel_size = [0.2, 0.2, 8]\nuse_checkpoint = False\nsync_bn = True\n# Model\ngrid_config = {\n    'x': [-51.2, 51.2, 0.8],\n    'y': [-51.2, 51.2, 0.8],\n    'z': [-5, 3, 8],\n    'depth': [1.0, 60.0, 0.5],\n}\ndepth_categories = 118 #(grid_config['depth'][1]-grid_config['depth'][0])//grid_config['depth'][2]\n\nnumC_Trans=80\n_dim_ = 256\n\n### occupancy config\nempty_idx = 18  # noise 0-->255\nnum_cls = 19  # 0 others, 1-16 obj, 17 free\nfix_void = num_cls == 19\n###\n\nmap_classes = ['divider', 'ped_crossing', 'boundary']\nmap_num_vec = 100\nmap_fixed_ptsnum_per_gt_line = 20 # now only support fixed_pts > 0\nmap_fixed_ptsnum_per_pred_line = 20\nmap_eval_use_same_gt_sample_num_flag = True\nmap_num_classes = len(map_classes)\n\nembed_dims = 256\nnum_feat_levels = 1\nnorm_cfg = dict(type='BN2d')\nnum_queries = 100\n\n# category configs\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\nnum_class = max(list(cat2id.values())) + 1\n\n\nnum_points = 20\npermute = True\nwith_ego_as_agent = False\n###\nmodel = dict(\n    type='BEVPlanner',\n    use_depth_supervision=False,\n    fix_void=fix_void,\n    do_history = do_history,\n    history_cat_num=history_cat_num,\n    single_bev_num_channels=numC_Trans,\n    fuse_history_bev=True,\n    use_grid_mask=True,\n    align_prev_bev=False,\n    with_ego_status=True,\n    img_backbone=dict(  \n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(2, 3),\n        frozen_stages=-1,\n        norm_cfg=dict(type='BN2d', requires_grad=False),\n        norm_eval=True,\n        with_cp=False,\n        # pretrained='torchvision://resnet50',\n        init_cfg=dict(\n            type='Pretrained', checkpoint=\"ckpts/cascade_mask_rcnn_r50_fpn_coco-20e_20e_nuim_20201009_124951-40963960.pth\",\n            prefix='backbone.'),   \n        style='pytorch'),\n    img_neck=dict(\n        type='CustomFPN',\n        in_channels=[1024, 2048],\n        out_channels=_dim_,\n        num_outs=1,\n        start_level=0,\n        with_cp=use_checkpoint,\n        out_ids=[0]),\n    depth_net=dict(\n        type='CM_DepthNet', # camera-aware depth net\n        in_channels=_dim_,\n        context_channels=numC_Trans,\n        downsample=16,\n        grid_config=grid_config,\n        depth_channels=depth_categories,\n        with_cp=use_checkpoint,\n        loss_depth_weight=1.,\n        aspp_mid_channels=96,\n        use_dcn=False,\n    ),\n    forward_projection=dict(\n        type='LSSViewTransformerFunction',\n        grid_config=grid_config,\n        input_size=data_config['input_size'],\n        downsample=16),\n    frpn=None,\n    backward_projection=None,\n    img_bev_encoder_backbone=dict(\n        type='CustomResNet',\n        numC_input=numC_Trans,\n        num_channels=[numC_Trans * 2, numC_Trans * 4, numC_Trans * 8]),\n    img_bev_encoder_neck=dict(\n        type='FPN_LSS',\n        in_channels=numC_Trans * 8 + numC_Trans * 2,\n        out_channels=256),\n    occupancy_head=None,\n    img_det_2d_head=None,\n    pts_bbox_head=None,\n    map_head=None,\n    motion_head=None,\n    planner_head=dict(\n        type='NaivePlannerHead',\n        with_ego_status=True,\n    ),\n    # model training and testing settings\n    train_cfg=dict(pts=dict(\n            grid_size=[512, 512, 1],\n            voxel_size=voxel_size,\n            point_cloud_range=point_cloud_range,\n            out_size_factor=4,\n            assigner=None),\n    )\n)\n\n\n# Data\ndataset_type = 'NuScenesDataset'\ndata_root = 'data/nuscenes/'\nfile_client_args = dict(backend='disk')\noccupancy_path = '/mount/data/occupancy_cvpr2023/gts'\nnormalize_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(\n        type='PrepareImageInputs',\n        is_train=True,\n        normalize_cfg=normalize_cfg,\n        data_config=data_config),\n    dict(\n        type='LoadAnnotationsBEVDepth',\n        bda_aug_conf=bda_aug_conf,\n        with_2d_bbox=True,\n        classes=class_names),\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),  \n    dict(\n        type='LoadVectorMap2',\n        data_root = data_root,\n        point_cloud_range =point_cloud_range,\n        map_classes = ['divider', 'ped_crossing', 'boundary'],\n        map_num_vec = 100,\n        map_fixed_ptsnum_per_line = 20, # now only support fixed_pts > 0,\n        map_eval_use_same_gt_sample_num_flag = True,\n        map_num_classes = 3,\n    ),   \n    dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),\n    dict(type='LoadGTMotion'),\n    dict(type='LoadGTPlaner'),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    # dict(type='VisualInputsAndGT'),\n    # dict(type='LoadOccupancy', ignore_nonvisible=True, fix_void=fix_void, occupancy_path=occupancy_path),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(\n        type='Collect3D', keys=['img_inputs', 'gt_bboxes_3d', 'gt_labels_3d', 'gt_depth', 'gt_bboxes_2d', 'gt_labels_2d', 'centers2d', 'depths2d',  'map_gt_labels_3d', 'map_gt_bboxes_3d'\n                               ] + ['gt_agent_fut_traj', 'gt_agent_fut_traj_mask']+ ['can_bus_info']+\n                               ['gt_ego_lcf_feat', 'gt_ego_fut_trajs', 'gt_ego_his_trajs', 'gt_ego_fut_cmd', 'gt_ego_fut_masks']\n                               )\n]\n\ntest_pipeline = [\n    dict(\n        type='CustomDistMultiScaleFlipAug3D',\n        tta=False,\n        transforms=[\n            dict(type='PrepareImageInputs',\n            # img_corruptions='sun', \n            data_config=data_config, normalize_cfg=normalize_cfg),\n            dict(\n                type='LoadAnnotationsBEVDepth',\n                bda_aug_conf=bda_aug_conf,\n                classes=class_names,\n                with_2d_bbox=True,\n                \n                is_train=False),\n            dict(\n                type='LoadPointsFromFile',\n                coord_type='LIDAR',\n                load_dim=5,\n                use_dim=5,\n                file_client_args=file_client_args),\n            dict(\n                type='LoadVectorMap',\n                data_root = data_root,\n                point_cloud_range =point_cloud_range,\n                map_classes = ['divider', 'ped_crossing', 'boundary'],\n                map_num_vec = 100,\n                map_fixed_ptsnum_per_line = 20, # now only support fixed_pts > 0,\n                map_eval_use_same_gt_sample_num_flag = True,\n                map_num_classes = 3,\n            ),   \n            dict(type='LoadGTPlaner'),\n            dict(type='LoadGTMotion',  with_ego_as_agent=with_ego_as_agent),   \n            dict(type='LoadFutBoxInfo'),\n            dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n            dict(type='ObjectNameFilter', classes=class_names),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points', 'img_inputs', 'gt_bboxes_3d', 'gt_labels_3d', 'map_gt_bboxes_3d', 'map_gt_labels_3d']+\n            ['gt_agent_fut_traj', 'gt_agent_fut_traj_mask']+['gt_ego_lcf_feat', 'gt_ego_fut_trajs', 'gt_ego_his_trajs', 'gt_ego_fut_cmd', 'gt_ego_fut_masks']+\n            ['gt_fut_segmentations', 'gt_fut_segmentations_plus', 'fut_boxes_in_cur_ego_list'] + ['can_bus_info']\n            )\n            ]\n        )\n]\n\ninput_modality = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\n\nshare_data_config = dict(\n    type=dataset_type,\n    classes=class_names,\n    modality=input_modality,\n    img_info_prototype='bevdet',\n    occupancy_path=occupancy_path,\n    data_root=data_root,\n    use_sequence_group_flag=True,\n)\n\ntest_data_config = dict(\n    pipeline=test_pipeline,\n    map_ann_file=data_root + 'nuscenes_map_infos_102x102_val.pkl',\n    map_eval_cfg=dict(\n        region = (102.4, 102.4) # (H, W)\n    ),\n    load_fut_bbox_info=True,\n    sequences_split_num=test_sequences_split_num,\n    ann_file=data_root + 'bev-next-nuscenes_infos_val.pkl')\n\ndata = dict(\n    samples_per_gpu=samples_per_gpu,\n    workers_per_gpu=2,\n    test_dataloader=dict(runner_type='IterBasedRunnerEval'),\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'bev-next-nuscenes_infos_train.pkl',\n        pipeline=train_pipeline,\n        test_mode=False,\n        use_valid_flag=True,\n        sequences_split_num=train_sequences_split_num,\n        filter_empty_gt=filter_empty_gt,\n        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n        # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n        box_type_3d='LiDAR'),\n    val=test_data_config,\n    test=test_data_config)\n\nfor key in ['train', 'val', 'test']:\n    data[key].update(share_data_config)\n\n\noptimizer = dict(\n    type='AdamW', \n    lr=1e-4, # bs 8: 2e-4 || bs 16: 4e-4\n    paramwise_cfg=dict(\n        custom_keys={\n            'img_backbone': dict(lr_mult=0.1), \n        }),\n    weight_decay=0.01)\n \noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    min_lr_ratio=1e-3,\n    )\n\nrunner = dict(type='IterBasedRunner', max_iters=num_epochs * num_iters_per_epoch)\ncheckpoint_config = dict(\n    interval=checkpoint_epoch_interval * num_iters_per_epoch)\nevaluation = dict(\n    interval=num_epochs * num_iters_per_epoch, pipeline=test_pipeline)\n\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n    ])\ncustom_hooks = [\n    dict(\n        type='MEGVIIEMAHook',\n        init_updates=10560,\n        priority='NORMAL',\n        interval=2*num_iters_per_epoch,\n    ),\n    dict(\n        type='SequentialControlHook',\n        temporal_start_iter=0,\n    ),\n    # dict(\n    #     type='TimerCP',\n    # )\n]\n# load_from = None\n# resume_from = None"
  },
  {
    "path": "configs/bev_next/bev_planner_w_map.py",
    "content": "# Copyright (c) 2023-2024, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# TODO: add license here\n\n\n\n# we follow the online training settings  from solofusion\nnum_gpus = 8\nsamples_per_gpu = 4\nnum_iters_per_epoch = int(28130 // (num_gpus * samples_per_gpu) )\nnum_epochs = 12\ncheckpoint_epoch_interval = 1\nuse_custom_eval_hook=True\n\n# Each nuScenes sequence is ~40 keyframes long. Our training procedure samples\n# sequences first, then loads frames from the sampled sequence in order \n# starting from the first frame. This reduces training step-to-step diversity,\n# lowering performance. To increase diversity, we split each training sequence\n# in half to ~20 keyframes, and sample these shorter sequences during training.\n# During testing, we do not do this splitting.\ntrain_sequences_split_num = 4\ntest_sequences_split_num = 1\n\n# By default, 3D detection datasets randomly choose another sample if there is\n# no GT object in the current sample. This does not make sense when doing\n# sequential sampling of frames, so we disable it.\nfilter_empty_gt = False\n\n# Long-Term Fusion Parameters\ndo_history = False\nhistory_cat_num = 4\nhistory_cat_conv_out_channels = 160\n\n_base_ = ['../_base_/datasets/nus-3d.py', '../_base_/default_runtime.py']\n# Global\n# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\n# bev configs\nroi_size = (102.4, 102.4)\nbev_h = 128\nbev_w = 128\npoint_cloud_range = [-roi_size[0]/2, -roi_size[1]/2, -5, roi_size[0]/2, roi_size[1]/2, 3]\n\n# For nuScenes we usually do 10-class detection\nclass_names = [\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\n\ndata_config = {\n    'cams': [\n        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',\n        'CAM_BACK', 'CAM_BACK_RIGHT'\n    ],\n    'Ncams':\n    6,\n    'input_size': (256, 704),\n    'src_size': (900, 1600),\n    # Augmentation\n    'resize': (0.38, 0.55),\n    'rot': (0, 0),\n    'flip': True,\n    'crop_h': (0.0, 0.0),\n    'resize_test': 0.00,\n}\nbda_aug_conf = dict(\n    rot_lim=(-0, 0),\n    scale_lim=(1., 1.),\n    flip_dx_ratio=0.,\n    flip_dy_ratio=0.)\nvoxel_size = [0.2, 0.2, 8]\nuse_checkpoint = False\nsync_bn = True\n# Model\ngrid_config = {\n    'x': [-51.2, 51.2, 0.8],\n    'y': [-51.2, 51.2, 0.8],\n    'z': [-5, 3, 8],\n    'depth': [1.0, 60.0, 0.5],\n}\ndepth_categories = 118 #(grid_config['depth'][1]-grid_config['depth'][0])//grid_config['depth'][2]\n\nnumC_Trans=80\n_dim_ = 256\n\n### occupancy config\nempty_idx = 18  # noise 0-->255\nnum_cls = 19  # 0 others, 1-16 obj, 17 free\nfix_void = num_cls == 19\n###\n\nmap_classes = ['divider', 'ped_crossing', 'boundary']\nmap_num_vec = 100\nmap_fixed_ptsnum_per_gt_line = 20 # now only support fixed_pts > 0\nmap_fixed_ptsnum_per_pred_line = 20\nmap_eval_use_same_gt_sample_num_flag = True\nmap_num_classes = len(map_classes)\n\nembed_dims = 256\nnum_feat_levels = 1\nnorm_cfg = dict(type='BN2d')\nnum_queries = 100\n\n# category configs\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\nnum_class = max(list(cat2id.values())) + 1\n\n\nnum_points = 20\npermute = True\nwith_ego_as_agent = False\n###\nmodel = dict(\n    type='BEVPlanner',\n    use_depth_supervision=False,\n    fix_void=fix_void,\n    do_history = do_history,\n    history_cat_num=history_cat_num,\n    single_bev_num_channels=numC_Trans,\n    fuse_history_bev=True,\n    use_grid_mask=True,\n    align_prev_bev=False,\n    img_backbone=dict(       \n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(2, 3),\n        frozen_stages=-1,\n        norm_cfg=dict(type='BN2d', requires_grad=False),\n        norm_eval=True,\n        with_cp=True,\n        pretrained='torchvision://resnet50',\n        style='pytorch'),\n    img_neck=dict(\n        type='CustomFPN',\n        in_channels=[1024, 2048],\n        out_channels=_dim_,\n        num_outs=1,\n        start_level=0,\n        with_cp=use_checkpoint,\n        out_ids=[0]),\n    depth_net=dict(\n        type='CM_DepthNet', # camera-aware depth net\n        in_channels=_dim_,\n        context_channels=numC_Trans,\n        downsample=16,\n        grid_config=grid_config,\n        depth_channels=depth_categories,\n        with_cp=use_checkpoint,\n        loss_depth_weight=1.,\n        aspp_mid_channels=96,\n        use_dcn=False,\n    ),\n    forward_projection=dict(\n        type='LSSViewTransformerFunction',\n        grid_config=grid_config,\n        input_size=data_config['input_size'],\n        downsample=16),\n    frpn=None,\n    backward_projection=None,\n    img_bev_encoder_backbone=dict(\n        type='CustomResNet',\n        numC_input=numC_Trans,\n        num_channels=[numC_Trans * 2, numC_Trans * 4, numC_Trans * 8]),\n    img_bev_encoder_neck=dict(\n        type='FPN_LSS',\n        in_channels=numC_Trans * 8 + numC_Trans * 2,\n        out_channels=256),\n    occupancy_head=None,\n    img_det_2d_head=None,\n    pts_bbox_head=None,\n    map_head=dict(\n        type='MapDetectorHead',\n        num_queries=num_queries,\n        embed_dims=embed_dims,\n        num_classes=num_class,\n        in_channels=embed_dims,\n        num_points=num_points,\n        roi_size=roi_size,\n        coord_dim=2,\n        different_heads=False,\n        predict_refine=False,\n        sync_cls_avg_factor=True,\n        streaming_cfg=dict(\n            streaming=False,\n            batch_size=samples_per_gpu,\n            topk=int(num_queries*(1/3)),\n            trans_loss_weight=0.1,\n        ),\n        # streaming_cfg=None,\n        transformer=dict(\n            type='MapTransformer',\n            num_feature_levels=1,\n            num_points=num_points,\n            coord_dim=2,\n            encoder=dict(\n                type='PlaceHolderEncoder',\n                embed_dims=embed_dims,\n            ),\n            decoder=dict(\n                type='MapTransformerDecoder_new',\n                num_layers=6,\n                return_intermediate=True,\n                transformerlayers=dict(\n                    type='MapTransformerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                        dict(\n                            type='CustomMSDeformableAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            num_levels=1,\n                            num_points=num_points,\n                            dropout=0.1,\n                        ),\n                    ],\n                    ffn_cfgs=dict(\n                        type='FFN',\n                        embed_dims=embed_dims,\n                        feedforward_channels=embed_dims*2,\n                        num_fcs=2,\n                        ffn_drop=0.1,\n                        act_cfg=dict(type='ReLU', inplace=True),        \n                    ),\n                    feedforward_channels=embed_dims*2,\n                    ffn_dropout=0.1,\n                    # operation_order=('norm', 'self_attn', 'norm', 'cross_attn',\n                    #                 'norm', 'ffn',)\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm',\n                                    'ffn', 'norm')\n                )\n            )\n        ),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=0.5\n        ),\n        loss_reg=dict(\n            type='LinesL1Loss',\n            loss_weight=5.0,\n            beta=0.01,\n        ),\n        assigner=dict(\n            type='HungarianLinesAssigner',\n                cost=dict(\n                    type='MapQueriesCost',\n                    cls_cost=dict(type='FocalLossCost', weight=0.5),\n                    reg_cost=dict(type='LinesL1Cost', weight=5.0, beta=0.01, permute=permute),\n                    ),\n                ),\n        ),\n    motion_head=None,\n    planner_head=dict(\n        type='NaivePlannerHead',\n        use_map_info=True,\n        loss_plan_reg=dict(type='L1Loss', loss_weight=20.0),\n        loss_plan_col=dict(type='PlanCollisionLoss', loss_weight=20.0),\n    ),\n    # model training and testing settings\n    train_cfg=dict(pts=dict(\n            grid_size=[512, 512, 1],\n            voxel_size=voxel_size,\n            point_cloud_range=point_cloud_range,\n            out_size_factor=4,\n            assigner=None),\n    )\n)\n\n\n# Data\ndataset_type = 'NuScenesDataset'\ndata_root = 'data/nuscenes/'\nfile_client_args = dict(backend='disk')\noccupancy_path = '/mount/data/occupancy_cvpr2023/gts'\nnormalize_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(\n        type='PrepareImageInputs',\n        is_train=True,\n        normalize_cfg=normalize_cfg,\n        data_config=data_config),\n    dict(\n        type='LoadAnnotationsBEVDepth',\n        bda_aug_conf=bda_aug_conf,\n        with_2d_bbox=True,\n        classes=class_names),\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),  \n    dict(\n        type='LoadVectorMap2',\n        data_root = data_root,\n        point_cloud_range =point_cloud_range,\n        map_classes = ['divider', 'ped_crossing', 'boundary'],\n        map_num_vec = 100,\n        map_fixed_ptsnum_per_line = 20, # now only support fixed_pts > 0,\n        map_eval_use_same_gt_sample_num_flag = True,\n        map_num_classes = 3,\n    ),   \n    dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),\n    dict(type='LoadGTMotion'),\n    dict(type='LoadGTPlaner'),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    # dict(type='VisualInputsAndGT'),\n    # dict(type='LoadOccupancy', ignore_nonvisible=True, fix_void=fix_void, occupancy_path=occupancy_path),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(\n        type='Collect3D', keys=['img_inputs', 'gt_bboxes_3d', 'gt_labels_3d', 'gt_depth', 'gt_bboxes_2d', 'gt_labels_2d', 'centers2d', 'depths2d',  'map_gt_labels_3d', 'map_gt_bboxes_3d'\n                               ] + ['gt_agent_fut_traj', 'gt_agent_fut_traj_mask']+\n                               ['gt_ego_lcf_feat', 'gt_ego_fut_trajs', 'gt_ego_his_trajs', 'gt_ego_fut_cmd', 'gt_ego_fut_masks']\n                               )\n]\n\ntest_pipeline = [\n    dict(\n        type='CustomDistMultiScaleFlipAug3D',\n        tta=False,\n        transforms=[\n            dict(type='PrepareImageInputs',\n            # img_corruptions='sun', \n            data_config=data_config, normalize_cfg=normalize_cfg),\n            dict(\n                type='LoadAnnotationsBEVDepth',\n                bda_aug_conf=bda_aug_conf,\n                classes=class_names,\n                with_2d_bbox=True,\n                \n                is_train=False),\n            dict(\n                type='LoadPointsFromFile',\n                coord_type='LIDAR',\n                load_dim=5,\n                use_dim=5,\n                file_client_args=file_client_args),\n            dict(\n                type='LoadVectorMap',\n                data_root = data_root,\n                point_cloud_range =point_cloud_range,\n                map_classes = ['divider', 'ped_crossing', 'boundary'],\n                map_num_vec = 100,\n                map_fixed_ptsnum_per_line = 20, # now only support fixed_pts > 0,\n                map_eval_use_same_gt_sample_num_flag = True,\n                map_num_classes = 3,\n            ),   \n            dict(type='LoadGTPlaner'),\n            dict(type='LoadGTMotion',  with_ego_as_agent=with_ego_as_agent),   \n            dict(type='LoadFutBoxInfo'),\n            dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n            dict(type='ObjectNameFilter', classes=class_names),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points', 'img_inputs', 'gt_bboxes_3d', 'gt_labels_3d', 'map_gt_bboxes_3d', 'map_gt_labels_3d']+\n            ['gt_agent_fut_traj', 'gt_agent_fut_traj_mask']+['gt_ego_lcf_feat', 'gt_ego_fut_trajs', 'gt_ego_his_trajs', 'gt_ego_fut_cmd', 'gt_ego_fut_masks']+\n            ['gt_fut_segmentations', 'gt_fut_segmentations_plus', 'fut_boxes_in_cur_ego_list']    \n            )\n            ]\n        )\n]\n\ninput_modality = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\n\nshare_data_config = dict(\n    type=dataset_type,\n    classes=class_names,\n    modality=input_modality,\n    img_info_prototype='bevdet',\n    occupancy_path=occupancy_path,\n    data_root=data_root,\n    use_sequence_group_flag=True,\n)\n\ntest_data_config = dict(\n    pipeline=test_pipeline,\n    map_ann_file=data_root + 'nuscenes_map_infos_102x102_val.pkl',\n    map_eval_cfg=dict(\n        region = (102.4, 102.4) # (H, W)\n    ),\n    load_fut_bbox_info=True,\n    sequences_split_num=test_sequences_split_num,\n    ann_file=data_root + 'bev-next-nuscenes_infos_val.pkl')\n\ndata = dict(\n    samples_per_gpu=samples_per_gpu,\n    workers_per_gpu=2,\n    test_dataloader=dict(runner_type='IterBasedRunnerEval'),\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'bev-next-nuscenes_infos_train.pkl',\n        pipeline=train_pipeline,\n        test_mode=False,\n        use_valid_flag=True,\n        \n        sequences_split_num=train_sequences_split_num,\n        filter_empty_gt=filter_empty_gt,\n        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n        # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n        box_type_3d='LiDAR'),\n    val=test_data_config,\n    test=test_data_config)\n\nfor key in ['train', 'val', 'test']:\n    data[key].update(share_data_config)\n\n\noptimizer = dict(\n    type='AdamW', \n    lr=1e-4, # bs 8: 2e-4 || bs 16: 4e-4\n    paramwise_cfg=dict(\n        custom_keys={\n            'img_backbone': dict(lr_mult=0.1), \n        }),\n    weight_decay=0.01)\n \noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    min_lr_ratio=1e-3,\n    )\n\nrunner = dict(type='IterBasedRunner', max_iters=num_epochs * num_iters_per_epoch)\ncheckpoint_config = dict(\n    interval=checkpoint_epoch_interval * num_iters_per_epoch)\nevaluation = dict(\n    interval=num_epochs * num_iters_per_epoch, pipeline=test_pipeline)\n\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n    ])\ncustom_hooks = [\n    dict(\n        type='MEGVIIEMAHook',\n        init_updates=10560,\n        priority='NORMAL',\n        interval=2*num_iters_per_epoch,\n    ),\n    dict(\n        type='SequentialControlHook',\n        temporal_start_iter=0,\n    ),\n    # dict(\n    #     type='TimerCP',\n    # )\n]\n# load_from = None\n# resume_from = None\n"
  },
  {
    "path": "configs/bev_next/det_pretrain_320x800_vov_36ep.py",
    "content": "# Copyright (c) 2023-2024, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# TODO: add license here\n\n\n\n# we follow the online training settings  from solofusion\nnum_gpus = 8\nsamples_per_gpu = 2\nnum_iters_per_epoch = int(28130 // (num_gpus * samples_per_gpu) )\nnum_epochs = 36\ncheckpoint_epoch_interval = 2\nuse_custom_eval_hook=True\n\n# Each nuScenes sequence is ~40 keyframes long. Our training procedure samples\n# sequences first, then loads frames from the sampled sequence in order \n# starting from the first frame. This reduces training step-to-step diversity,\n# lowering performance. To increase diversity, we split each training sequence\n# in half to ~20 keyframes, and sample these shorter sequences during training.\n# During testing, we do not do this splitting.\ntrain_sequences_split_num = 4\ntest_sequences_split_num = 1\n\n# By default, 3D detection datasets randomly choose another sample if there is\n# no GT object in the current sample. This does not make sense when doing\n# sequential sampling of frames, so we disable it.\nfilter_empty_gt = False\n\n# Long-Term Fusion Parameters\ndo_history = False\nhistory_cat_num = 4\nhistory_cat_conv_out_channels = 160\n\n_base_ = ['../_base_/datasets/nus-3d.py', '../_base_/default_runtime.py']\n# Global\n# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\npoint_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]\n# For nuScenes we usually do 10-class detection\nclass_names = [\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\n\ndata_config = {\n    'cams': [\n        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',\n        'CAM_BACK', 'CAM_BACK_RIGHT'\n    ],\n    'Ncams':\n    6,\n    'input_size': (320, 800),\n    'src_size': (900, 1600),\n    # Augmentation\n    'resize': (0.47, 0.625),\n    'rot': (0, 0),\n    'flip': True,\n    'crop_h': (0.0, 0.0),\n    'resize_test': 0.00,\n}\nbda_aug_conf = dict(\n    rot_lim=(-22.5, 22.5),\n    scale_lim=(1., 1.),\n    flip_dx_ratio=0.5,\n    flip_dy_ratio=0.5)\nvoxel_size = [0.2, 0.2, 8]\nuse_checkpoint = False\nsync_bn = True\n# Model\ngrid_config = {\n    'x': [-51.2, 51.2, 0.8],\n    'y': [-51.2, 51.2, 0.8],\n    'z': [-5, 3, 8],\n    'depth': [1.0, 60.0, 0.5],\n}\ndepth_categories = 118 #(grid_config['depth'][1]-grid_config['depth'][0])//grid_config['depth'][2]\n\nnumC_Trans=80\n_dim_ = 256\n\nempty_idx = 18  # noise 0-->255\nnum_cls = 19  # 0 others, 1-16 obj, 17 free\nfix_void = num_cls == 19\n\nmodel = dict(\n    type='BEVPlanner',\n    use_depth_supervision=True,\n    fix_void=fix_void,\n    do_history = do_history,\n    history_cat_num=history_cat_num,\n    single_bev_num_channels=numC_Trans,\n    use_grid_mask=True,\n    with_ego_status=False,\n    img_backbone=dict(\n        type='VoVNetCP', ###use checkpoint to save memory\n        spec_name='V-99-eSE',\n        norm_eval=True,\n        frozen_stages=-1,\n        input_ch=3,\n        out_features=('stage4','stage5',)),\n    img_neck=dict(\n        type='CustomFPN',\n        in_channels=[768, 1024],\n        out_channels=_dim_,\n        num_outs=1,\n        start_level=0,\n        with_cp=use_checkpoint,\n        out_ids=[0]),\n    depth_net=dict(\n        type='CM_DepthNet', # camera-aware depth net\n        in_channels=_dim_,\n        context_channels=numC_Trans,\n        downsample=16,\n        grid_config=grid_config,\n        depth_channels=depth_categories,\n        with_cp=use_checkpoint,\n        loss_depth_weight=3.,\n        aspp_mid_channels=96,\n        use_dcn=False,\n    ),\n    forward_projection=dict(\n        type='LSSViewTransformerFunction',\n        grid_config=grid_config,\n        input_size=data_config['input_size'],\n        downsample=16),\n    frpn=None,\n    backward_projection=None,\n    img_bev_encoder_backbone=dict(\n        type='CustomResNet',\n        numC_input=numC_Trans,\n        num_channels=[numC_Trans * 2, numC_Trans * 4, numC_Trans * 8]),\n    img_bev_encoder_neck=dict(\n        type='FPN_LSS',\n        in_channels=numC_Trans * 8 + numC_Trans * 2,\n        out_channels=256),\n    occupancy_head=None,\n    img_det_2d_head=dict(\n        type='YOLOXHeadCustom',\n        num_classes=10,\n        in_channels=80,\n        strides=[16],\n        train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)),\n        test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65)),\n    ),\n    pts_bbox_head=dict(\n        type='SparseHead4BEV',\n        num_classes=10,\n        in_channels=_dim_,\n        num_query=300,\n        memory_len=512,\n        topk_proposals=128,\n        num_propagated=128,\n        scalar=10, ##noise groups\n        noise_scale = 1.0, \n        dn_weight= 1.0, ##dn loss weight\n        split = 0.75, ###positive rate\n        with_dn=True,\n        with_ego_pos=True,\n        match_with_velo=False,\n        code_weights = [2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],\n        transformer=dict(\n            type='Detr3DTransformer',\n            decoder=dict(\n                type='Detr3DTransformerDecoder',\n                embed_dims=_dim_,\n                num_layers=6,\n                transformerlayers=dict(\n                    type='Detr3DTemporalDecoderLayer',\n                    batch_first=True,\n                    attn_cfgs=[\n                        dict(\n                            type='SparseBEVSelfAttention',\n                            embed_dims=_dim_,\n                            num_heads=8,\n                            dropout=0.0),\n                        dict(\n                            type='DeformableFeatureAggregationCuda', \n                            embed_dims=_dim_,\n                            num_groups=8,\n                            num_levels=1,\n                            # num_cams=6,\n                            dropout=0.0,\n                            num_pts=13,\n                            bias=2.),\n                        ],\n                    feedforward_channels=2048,\n                    ffn_dropout=0.0,\n                    with_cp=True,  ###use checkpoint to save memory\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm',\n                                     'ffn', 'norm')),\n            )),\n        bbox_coder=dict(\n            type='NMSFreeCoder',\n            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n            pc_range=point_cloud_range,\n            max_num=300,\n            num_classes=10), \n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=2.0),\n        loss_bbox=dict(type='L1Loss', loss_weight=0.25),\n        loss_iou=dict(type='GIoULoss', loss_weight=0.0),),\n    map_head=None,\n        # model training and testing settings\n    train_cfg=dict(pts=dict(\n            grid_size=[512, 512, 1],\n            voxel_size=voxel_size,\n            point_cloud_range=point_cloud_range,\n            out_size_factor=4,\n            assigner=dict(\n                type='HungarianAssigner3D',\n                cls_cost=dict(type='FocalLossCost', weight=2.0),\n                reg_cost=dict(type='BBox3DL1Cost', weight=0.25),\n                iou_cost=dict(type='IoUCost', weight=0.0), # Fake cost. This is just to make it compatible with DETR head. \n                pc_range=point_cloud_range),\n            ),\n        )\n)\n\n# Data\ndataset_type = 'NuScenesDataset'\ndata_root = 'data/nuscenes/'\nfile_client_args = dict(backend='disk')\noccupancy_path = '/mount/data/occupancy_cvpr2023/gts'\nnormalize_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(\n        type='PrepareImageInputs',\n        is_train=True,\n        normalize_cfg=normalize_cfg,\n        data_config=data_config),\n    dict(\n        type='LoadAnnotationsBEVDepth',\n        bda_aug_conf=bda_aug_conf,\n        with_2d_bbox=True,\n        classes=class_names),\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),  \n    dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    # dict(type='VisualInputsAndGT'),\n    # dict(type='LoadOccupancy', ignore_nonvisible=True, fix_void=fix_void, occupancy_path=occupancy_path),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(\n        type='Collect3D', keys=['img_inputs', 'gt_bboxes_3d', 'gt_labels_3d', 'gt_depth', 'gt_bboxes_2d', 'gt_labels_2d', 'centers2d', 'depths2d',\n                               ] + ['can_bus_info'])\n]\n\ntest_pipeline = [\n    dict(\n        type='CustomDistMultiScaleFlipAug3D',\n        tta=False,\n        transforms=[\n            dict(type='PrepareImageInputs', data_config=data_config, normalize_cfg=normalize_cfg),\n            dict(\n                type='LoadAnnotationsBEVDepth',\n                bda_aug_conf=bda_aug_conf,\n                classes=class_names,\n                with_2d_bbox=True,\n                is_train=False),\n            dict(\n                type='LoadPointsFromFile',\n                coord_type='LIDAR',\n                load_dim=5,\n                use_dim=5,\n                file_client_args=file_client_args),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points', 'img_inputs', 'gt_bboxes_3d', 'gt_labels_3d'] + ['can_bus_info'])\n            ]\n        )\n]\n\ninput_modality = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\n\nshare_data_config = dict(\n    type=dataset_type,\n    classes=class_names,\n    modality=input_modality,\n    img_info_prototype='bevdet',\n    occupancy_path=occupancy_path,\n    data_root=data_root,\n    use_sequence_group_flag=True,\n)\n\ntest_data_config = dict(\n    pipeline=test_pipeline,\n    sequences_split_num=test_sequences_split_num,\n    ann_file=data_root + 'bev-next-nuscenes_infos_val.pkl')\n\ndata = dict(\n    samples_per_gpu=samples_per_gpu,\n    workers_per_gpu=6,\n    test_dataloader=dict(runner_type='IterBasedRunnerEval'),\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'bev-next-nuscenes_infos_train.pkl',\n        pipeline=train_pipeline,\n        classes=class_names,\n        test_mode=False,\n        use_valid_flag=True,\n        modality=input_modality,\n        img_info_prototype='bevdet',\n        sequences_split_num=train_sequences_split_num,\n        use_sequence_group_flag=True,\n        filter_empty_gt=filter_empty_gt,\n        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n        # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n        box_type_3d='LiDAR'),\n    val=test_data_config,\n    test=test_data_config)\n\nfor key in ['val', 'test']:\n    data[key].update(share_data_config)\n\n\noptimizer = dict(\n    type='AdamW', \n    lr=4e-4, # bs 8: 2e-4 || bs 16: 4e-4\n    paramwise_cfg=dict(\n        custom_keys={\n            'img_backbone': dict(lr_mult=0.1), \n        }),\n    weight_decay=0.01)\n \noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    min_lr_ratio=1e-3,\n    )\n\nrunner = dict(type='IterBasedRunner', max_iters=num_epochs * num_iters_per_epoch)\ncheckpoint_config = dict(\n    interval=checkpoint_epoch_interval * num_iters_per_epoch)\nevaluation = dict(\n    interval=num_epochs * num_iters_per_epoch, pipeline=test_pipeline)\n\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n    ])\ncustom_hooks = [\n    dict(\n        type='MEGVIIEMAHook',\n        init_updates=10560,\n        priority='NORMAL',\n        interval=checkpoint_epoch_interval*num_iters_per_epoch,\n    ),\n    dict(\n        type='SequentialControlHook',\n        temporal_start_iter= num_iters_per_epoch*2,\n    ),\n    dict(\n        type='TimerCP',\n    )\n]\nload_from = 'ckpts/fcos3d_vovnet_imgbackbone-remapped.pth'"
  },
  {
    "path": "configs/bev_next/det_pretrain_640x1600_vov_36ep.py",
    "content": "# Copyright (c) 2023-2024, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# TODO: add license here\n\n\n\n# we follow the online training settings  from solofusion\nnum_gpus = 8\nsamples_per_gpu = 2\nnum_iters_per_epoch = int(28130 // (num_gpus * samples_per_gpu) )\nnum_epochs = 36\ncheckpoint_epoch_interval = 2\nuse_custom_eval_hook=True\n\n# Each nuScenes sequence is ~40 keyframes long. Our training procedure samples\n# sequences first, then loads frames from the sampled sequence in order \n# starting from the first frame. This reduces training step-to-step diversity,\n# lowering performance. To increase diversity, we split each training sequence\n# in half to ~20 keyframes, and sample these shorter sequences during training.\n# During testing, we do not do this splitting.\ntrain_sequences_split_num = 4\ntest_sequences_split_num = 1\n\n# By default, 3D detection datasets randomly choose another sample if there is\n# no GT object in the current sample. This does not make sense when doing\n# sequential sampling of frames, so we disable it.\nfilter_empty_gt = False\n\n# Long-Term Fusion Parameters\ndo_history = False\nhistory_cat_num = 4\nhistory_cat_conv_out_channels = 160\n\n_base_ = ['../_base_/datasets/nus-3d.py', '../_base_/default_runtime.py']\n# Global\n# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\npoint_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]\n# For nuScenes we usually do 10-class detection\nclass_names = [\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\n\ndata_config = {\n    'cams': [\n        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',\n        'CAM_BACK', 'CAM_BACK_RIGHT'\n    ],\n    'Ncams':\n    6,\n    'input_size': (640, 1600),\n    'src_size': (900, 1600),\n    # Augmentation\n    'resize': (0.94, 1.25),\n    'rot': (0, 0),\n    'flip': True,\n    'crop_h': (0.0, 0.0),\n    'resize_test': 0.00,\n}\nbda_aug_conf = dict(\n    rot_lim=(-22.5, 22.5),\n    scale_lim=(1., 1.),\n    flip_dx_ratio=0.5,\n    flip_dy_ratio=0.5)\nvoxel_size = [0.2, 0.2, 8]\nuse_checkpoint = False\nsync_bn = True\n# Model\ngrid_config = {\n    'x': [-51.2, 51.2, 0.8],\n    'y': [-51.2, 51.2, 0.8],\n    'z': [-5, 3, 8],\n    'depth': [1.0, 60.0, 0.5],\n}\ndepth_categories = 118 #(grid_config['depth'][1]-grid_config['depth'][0])//grid_config['depth'][2]\n\nnumC_Trans=80\n_dim_ = 256\n\nempty_idx = 18  # noise 0-->255\nnum_cls = 19  # 0 others, 1-16 obj, 17 free\nfix_void = num_cls == 19\n\nmodel = dict(\n    type='BEVPlanner',\n    use_depth_supervision=True,\n    fix_void=fix_void,\n    do_history = do_history,\n    history_cat_num=history_cat_num,\n    single_bev_num_channels=numC_Trans,\n    use_grid_mask=True,\n    with_ego_status=False,\n    img_backbone=dict(\n        type='VoVNetCP', ###use checkpoint to save memory\n        spec_name='V-99-eSE',\n        norm_eval=True,\n        frozen_stages=-1,\n        input_ch=3,\n        out_features=('stage4','stage5',)),\n    img_neck=dict(\n        type='CustomFPN',\n        in_channels=[768, 1024],\n        out_channels=_dim_,\n        num_outs=1,\n        start_level=0,\n        with_cp=use_checkpoint,\n        out_ids=[0]),\n    depth_net=dict(\n        type='CM_DepthNet', # camera-aware depth net\n        in_channels=_dim_,\n        context_channels=numC_Trans,\n        downsample=16,\n        grid_config=grid_config,\n        depth_channels=depth_categories,\n        with_cp=use_checkpoint,\n        loss_depth_weight=3.,\n        aspp_mid_channels=96,\n        use_dcn=False,\n    ),\n    forward_projection=dict(\n        type='LSSViewTransformerFunction',\n        grid_config=grid_config,\n        input_size=data_config['input_size'],\n        downsample=16),\n    frpn=None,\n    backward_projection=None,\n    img_bev_encoder_backbone=dict(\n        type='CustomResNet',\n        numC_input=numC_Trans,\n        num_channels=[numC_Trans * 2, numC_Trans * 4, numC_Trans * 8]),\n    img_bev_encoder_neck=dict(\n        type='FPN_LSS',\n        in_channels=numC_Trans * 8 + numC_Trans * 2,\n        out_channels=256),\n    occupancy_head=None,\n    img_det_2d_head=dict(\n        type='YOLOXHeadCustom',\n        num_classes=10,\n        in_channels=80,\n        strides=[16],\n        train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)),\n        test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65)),\n    ),\n    pts_bbox_head=dict(\n        type='SparseHead4BEV',\n        num_classes=10,\n        in_channels=_dim_,\n        num_query=300,\n        memory_len=512,\n        topk_proposals=128,\n        num_propagated=128,\n        scalar=10, ##noise groups\n        noise_scale = 1.0, \n        dn_weight= 1.0, ##dn loss weight\n        split = 0.75, ###positive rate\n        with_dn=True,\n        with_ego_pos=True,\n        match_with_velo=False,\n        code_weights = [2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],\n        transformer=dict(\n            type='Detr3DTransformer',\n            decoder=dict(\n                type='Detr3DTransformerDecoder',\n                embed_dims=_dim_,\n                num_layers=6,\n                transformerlayers=dict(\n                    type='Detr3DTemporalDecoderLayer',\n                    batch_first=True,\n                    attn_cfgs=[\n                        dict(\n                            type='SparseBEVSelfAttention',\n                            embed_dims=_dim_,\n                            num_heads=8,\n                            dropout=0.0),\n                        dict(\n                            type='DeformableFeatureAggregationCuda', \n                            embed_dims=_dim_,\n                            num_groups=8,\n                            num_levels=1,\n                            # num_cams=6,\n                            dropout=0.0,\n                            num_pts=13,\n                            bias=2.),\n                        ],\n                    feedforward_channels=2048,\n                    ffn_dropout=0.0,\n                    with_cp=True,  ###use checkpoint to save memory\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm',\n                                     'ffn', 'norm')),\n            )),\n        bbox_coder=dict(\n            type='NMSFreeCoder',\n            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n            pc_range=point_cloud_range,\n            max_num=300,\n            num_classes=10), \n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=2.0),\n        loss_bbox=dict(type='L1Loss', loss_weight=0.25),\n        loss_iou=dict(type='GIoULoss', loss_weight=0.0),),\n    map_head=None,\n        # model training and testing settings\n    train_cfg=dict(pts=dict(\n            grid_size=[512, 512, 1],\n            voxel_size=voxel_size,\n            point_cloud_range=point_cloud_range,\n            out_size_factor=4,\n            assigner=dict(\n                type='HungarianAssigner3D',\n                cls_cost=dict(type='FocalLossCost', weight=2.0),\n                reg_cost=dict(type='BBox3DL1Cost', weight=0.25),\n                iou_cost=dict(type='IoUCost', weight=0.0), # Fake cost. This is just to make it compatible with DETR head. \n                pc_range=point_cloud_range),\n            ),\n        )\n)\n\n# Data\ndataset_type = 'NuScenesDataset'\ndata_root = 'data/nuscenes/'\nfile_client_args = dict(backend='disk')\noccupancy_path = '/mount/data/occupancy_cvpr2023/gts'\nnormalize_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(\n        type='PrepareImageInputs',\n        is_train=True,\n        normalize_cfg=normalize_cfg,\n        data_config=data_config),\n    dict(\n        type='LoadAnnotationsBEVDepth',\n        bda_aug_conf=bda_aug_conf,\n        with_2d_bbox=True,\n        classes=class_names),\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),  \n    dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    # dict(type='VisualInputsAndGT'),\n    # dict(type='LoadOccupancy', ignore_nonvisible=True, fix_void=fix_void, occupancy_path=occupancy_path),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(\n        type='Collect3D', keys=['img_inputs', 'gt_bboxes_3d', 'gt_labels_3d', 'gt_depth', 'gt_bboxes_2d', 'gt_labels_2d', 'centers2d', 'depths2d',\n                               ] + ['can_bus_info'])\n]\n\ntest_pipeline = [\n    dict(\n        type='CustomDistMultiScaleFlipAug3D',\n        tta=False,\n        transforms=[\n            dict(type='PrepareImageInputs', data_config=data_config, normalize_cfg=normalize_cfg),\n            dict(\n                type='LoadAnnotationsBEVDepth',\n                bda_aug_conf=bda_aug_conf,\n                classes=class_names,\n                with_2d_bbox=True,\n                is_train=False),\n            dict(\n                type='LoadPointsFromFile',\n                coord_type='LIDAR',\n                load_dim=5,\n                use_dim=5,\n                file_client_args=file_client_args),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points', 'img_inputs', 'gt_bboxes_3d', 'gt_labels_3d'] + ['can_bus_info'])\n            ]\n        )\n]\n\ninput_modality = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\n\nshare_data_config = dict(\n    type=dataset_type,\n    classes=class_names,\n    modality=input_modality,\n    img_info_prototype='bevdet',\n    occupancy_path=occupancy_path,\n    data_root=data_root,\n    use_sequence_group_flag=True,\n)\n\ntest_data_config = dict(\n    pipeline=test_pipeline,\n    sequences_split_num=test_sequences_split_num,\n    ann_file=data_root + 'bev-next-nuscenes_infos_val.pkl')\n\ndata = dict(\n    samples_per_gpu=samples_per_gpu,\n    workers_per_gpu=6,\n    test_dataloader=dict(runner_type='IterBasedRunnerEval'),\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'bev-next-nuscenes_infos_train.pkl',\n        pipeline=train_pipeline,\n        classes=class_names,\n        test_mode=False,\n        use_valid_flag=True,\n        modality=input_modality,\n        img_info_prototype='bevdet',\n        sequences_split_num=train_sequences_split_num,\n        use_sequence_group_flag=True,\n        filter_empty_gt=filter_empty_gt,\n        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n        # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n        box_type_3d='LiDAR'),\n    val=test_data_config,\n    test=test_data_config)\n\nfor key in ['val', 'test']:\n    data[key].update(share_data_config)\n\n\noptimizer = dict(\n    type='AdamW', \n    lr=4e-4, # bs 8: 2e-4 || bs 16: 4e-4\n    paramwise_cfg=dict(\n        custom_keys={\n            'img_backbone': dict(lr_mult=0.1), \n        }),\n    weight_decay=0.01)\n \noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    min_lr_ratio=1e-3,\n    )\n\nrunner = dict(type='IterBasedRunner', max_iters=num_epochs * num_iters_per_epoch)\ncheckpoint_config = dict(\n    interval=checkpoint_epoch_interval * num_iters_per_epoch)\nevaluation = dict(\n    interval=num_epochs * num_iters_per_epoch, pipeline=test_pipeline)\n\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n    ])\ncustom_hooks = [\n    dict(\n        type='MEGVIIEMAHook',\n        init_updates=10560,\n        priority='NORMAL',\n        interval=checkpoint_epoch_interval*num_iters_per_epoch,\n    ),\n    dict(\n        type='SequentialControlHook',\n        temporal_start_iter= num_iters_per_epoch*2,\n    ),\n    dict(\n        type='TimerCP',\n    )\n]\nload_from = 'ckpts/fcos3d_vovnet_imgbackbone-remapped.pth'"
  },
  {
    "path": "configs/bev_next/map_pretrain.py",
    "content": "# Copyright (c) 2023-2024, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# TODO: add license here\n\n\n# we follow the online training settings  from solofusion\nnum_gpus = 8\nsamples_per_gpu = 4\nnum_iters_per_epoch = int(28130 // (num_gpus * samples_per_gpu) )\nnum_epochs = 60\ncheckpoint_epoch_interval = 12\nuse_custom_eval_hook=True\n\n# Each nuScenes sequence is ~40 keyframes long. Our training procedure samples\n# sequences first, then loads frames from the sampled sequence in order \n# starting from the first frame. This reduces training step-to-step diversity,\n# lowering performance. To increase diversity, we split each training sequence\n# in half to ~20 keyframes, and sample these shorter sequences during training.\n# During testing, we do not do this splitting.\ntrain_sequences_split_num = 4\ntest_sequences_split_num = 1\n\n# By default, 3D detection datasets randomly choose another sample if there is\n# no GT object in the current sample. This does not make sense when doing\n# sequential sampling of frames, so we disable it.\nfilter_empty_gt = False\n\n# Long-Term Fusion Parameters\ndo_history = False\nhistory_cat_num = 4\nhistory_cat_conv_out_channels = 160\n\n_base_ = ['../_base_/datasets/nus-3d.py', '../_base_/default_runtime.py']\n# Global\n# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\n# bev configs\nroi_size = (102.4, 102.4)\nbev_h = 128\nbev_w = 128\npoint_cloud_range = [-roi_size[0]/2, -roi_size[1]/2, -5, roi_size[0]/2, roi_size[1]/2, 3]\n\n# For nuScenes we usually do 10-class detection\nclass_names = [\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\n\ndata_config = {\n    'cams': [\n        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',\n        'CAM_BACK', 'CAM_BACK_RIGHT'\n    ],\n    'Ncams':\n    6,\n    'input_size': (256, 704),\n    'src_size': (900, 1600),\n    # Augmentation\n    'resize': (0.38, 0.55),\n    'rot': (0, 0),\n    'flip': True,\n    'crop_h': (0.0, 0.0),\n    'resize_test': 0.00,\n}\nbda_aug_conf = dict(\n    rot_lim=(-0, 0),\n    scale_lim=(1., 1.),\n    flip_dx_ratio=0.,\n    flip_dy_ratio=0.)\nvoxel_size = [0.2, 0.2, 8]\nuse_checkpoint = False\nsync_bn = True\n# Model\ngrid_config = {\n    'x': [-51.2, 51.2, 0.8],\n    'y': [-51.2, 51.2, 0.8],\n    'z': [-5, 3, 8],\n    'depth': [1.0, 60.0, 0.5],\n}\ndepth_categories = 118 #(grid_config['depth'][1]-grid_config['depth'][0])//grid_config['depth'][2]\n\nnumC_Trans=80\n_dim_ = 256\n\n### occupancy config\nempty_idx = 18  # noise 0-->255\nnum_cls = 19  # 0 others, 1-16 obj, 17 free\nfix_void = num_cls == 19\n###\nmap_classes = ['divider', 'ped_crossing', 'boundary']\nmap_num_vec = 100\nmap_fixed_ptsnum_per_gt_line = 20 # now only support fixed_pts > 0\nmap_fixed_ptsnum_per_pred_line = 20\nmap_eval_use_same_gt_sample_num_flag = True\nmap_num_classes = len(map_classes)\n\nembed_dims = 256\nnum_feat_levels = 1\nnorm_cfg = dict(type='BN2d')\nnum_queries = 100\n\n# category configs\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\nnum_class = max(list(cat2id.values())) + 1\n\n\nnum_points = 20\npermute = True\nwith_ego_as_agent = False\n###\nmodel = dict(\n    type='BEVPlanner',\n    use_depth_supervision=True,\n    fix_void=fix_void,\n    do_history = do_history,\n    history_cat_num=history_cat_num,\n    single_bev_num_channels=numC_Trans,\n    fuse_history_bev=True,\n    use_grid_mask=True,\n    align_prev_bev=False,\n    img_backbone=dict(\n        init_cfg=dict(\n            type='Pretrained', checkpoint=\"ckpts/cascade_mask_rcnn_r50_fpn_coco-20e_20e_nuim_20201009_124951-40963960.pth\",\n            prefix='backbone.'),       \n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(2, 3),\n        frozen_stages=-1,\n        norm_cfg=dict(type='BN2d', requires_grad=False),\n        norm_eval=True,\n        with_cp=False,\n        style='pytorch'),\n    img_neck=dict(\n        type='CustomFPN',\n        in_channels=[1024, 2048],\n        out_channels=_dim_,\n        num_outs=1,\n        start_level=0,\n        with_cp=use_checkpoint,\n        out_ids=[0]),\n    depth_net=dict(\n        type='CM_DepthNet', # camera-aware depth net\n        in_channels=_dim_,\n        context_channels=numC_Trans,\n        downsample=16,\n        grid_config=grid_config,\n        depth_channels=depth_categories,\n        with_cp=use_checkpoint,\n        loss_depth_weight=1.,\n        aspp_mid_channels=96,\n        use_dcn=False,\n    ),\n    forward_projection=dict(\n        type='LSSViewTransformerFunction',\n        grid_config=grid_config,\n        input_size=data_config['input_size'],\n        downsample=16),\n    frpn=None,\n    backward_projection=None,\n    img_bev_encoder_backbone=dict(\n        type='CustomResNet',\n        numC_input=numC_Trans,\n        num_channels=[numC_Trans * 2, numC_Trans * 4, numC_Trans * 8]),\n    img_bev_encoder_neck=dict(\n        type='FPN_LSS',\n        in_channels=numC_Trans * 8 + numC_Trans * 2,\n        out_channels=256),\n    occupancy_head=None,\n    img_det_2d_head=None,\n    pts_bbox_head=None,\n    map_head=dict(\n        type='MapDetectorHead',\n        num_queries=num_queries,\n        embed_dims=embed_dims,\n        num_classes=num_class,\n        in_channels=embed_dims,\n        num_points=num_points,\n        roi_size=roi_size,\n        coord_dim=2,\n        different_heads=False,\n        predict_refine=False,\n        sync_cls_avg_factor=True,\n        streaming_cfg=dict(\n            streaming=False,\n            batch_size=samples_per_gpu,\n            topk=int(num_queries*(1/3)),\n            trans_loss_weight=0.1,\n        ),\n        # streaming_cfg=None,\n        transformer=dict(\n            type='MapTransformer',\n            num_feature_levels=1,\n            num_points=num_points,\n            coord_dim=2,\n            encoder=dict(\n                type='PlaceHolderEncoder',\n                embed_dims=embed_dims,\n            ),\n            decoder=dict(\n                type='MapTransformerDecoder_new',\n                num_layers=6,\n                return_intermediate=True,\n                transformerlayers=dict(\n                    type='MapTransformerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                        dict(\n                            type='CustomMSDeformableAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            num_levels=1,\n                            num_points=num_points,\n                            dropout=0.1,\n                        ),\n                    ],\n                    ffn_cfgs=dict(\n                        type='FFN',\n                        embed_dims=embed_dims,\n                        feedforward_channels=embed_dims*2,\n                        num_fcs=2,\n                        ffn_drop=0.1,\n                        act_cfg=dict(type='ReLU', inplace=True),        \n                    ),\n                    feedforward_channels=embed_dims*2,\n                    ffn_dropout=0.1,\n                    # operation_order=('norm', 'self_attn', 'norm', 'cross_attn',\n                    #                 'norm', 'ffn',)\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm',\n                                    'ffn', 'norm')\n                )\n            )\n        ),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=0.5\n        ),\n        loss_reg=dict(\n            type='LinesL1Loss',\n            loss_weight=5.0,\n            beta=0.01,\n        ),\n        assigner=dict(\n            type='HungarianLinesAssigner',\n                cost=dict(\n                    type='MapQueriesCost',\n                    cls_cost=dict(type='FocalLossCost', weight=0.5),\n                    reg_cost=dict(type='LinesL1Cost', weight=5.0, beta=0.01, permute=permute),\n                    ),\n                ),\n        ),\n    motion_head=None,\n    planner_head=None,\n    # model training and testing settings\n    train_cfg=dict(pts=dict(\n            grid_size=[512, 512, 1],\n            voxel_size=voxel_size,\n            point_cloud_range=point_cloud_range,\n            out_size_factor=4,\n            assigner=None),\n    )\n)\n\n\n# Data\ndataset_type = 'NuScenesDataset'\ndata_root = 'data/nuscenes/'\nfile_client_args = dict(backend='disk')\noccupancy_path = '/mount/data/occupancy_cvpr2023/gts'\nnormalize_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(\n        type='PrepareImageInputs',\n        is_train=True,\n        normalize_cfg=normalize_cfg,\n        data_config=data_config),\n    dict(\n        type='LoadAnnotationsBEVDepth',\n        bda_aug_conf=bda_aug_conf,\n        with_2d_bbox=True,\n        classes=class_names),\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),  \n    dict(\n        type='LoadVectorMap2',\n        data_root = data_root,\n        point_cloud_range =point_cloud_range,\n        map_classes = ['divider', 'ped_crossing', 'boundary'],\n        map_num_vec = 100,\n        map_fixed_ptsnum_per_line = 20, # now only support fixed_pts > 0,\n        map_eval_use_same_gt_sample_num_flag = True,\n        map_num_classes = 3,\n    ),   \n    dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),\n    dict(type='LoadGTMotion'),\n    dict(type='LoadGTPlaner'),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    # dict(type='VisualInputsAndGT'),\n    # dict(type='LoadOccupancy', ignore_nonvisible=True, fix_void=fix_void, occupancy_path=occupancy_path),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(\n        type='Collect3D', keys=['img_inputs', 'gt_bboxes_3d', 'gt_labels_3d', 'gt_depth', 'gt_bboxes_2d', 'gt_labels_2d', 'centers2d', 'depths2d',  'map_gt_labels_3d', 'map_gt_bboxes_3d'\n                               ] + ['gt_agent_fut_traj', 'gt_agent_fut_traj_mask']+\n                               ['gt_ego_lcf_feat', 'gt_ego_fut_trajs', 'gt_ego_his_trajs', 'gt_ego_fut_cmd', 'gt_ego_fut_masks']\n                               )\n]\n\ntest_pipeline = [\n    dict(\n        type='CustomDistMultiScaleFlipAug3D',\n        tta=False,\n        transforms=[\n            dict(type='PrepareImageInputs',\n            # img_corruptions='sun', \n            data_config=data_config, normalize_cfg=normalize_cfg),\n            dict(\n                type='LoadAnnotationsBEVDepth',\n                bda_aug_conf=bda_aug_conf,\n                classes=class_names,\n                with_2d_bbox=True,\n                \n                is_train=False),\n            dict(\n                type='LoadPointsFromFile',\n                coord_type='LIDAR',\n                load_dim=5,\n                use_dim=5,\n                file_client_args=file_client_args),\n            dict(\n                type='LoadVectorMap',\n                data_root = data_root,\n                point_cloud_range =point_cloud_range,\n                map_classes = ['divider', 'ped_crossing', 'boundary'],\n                map_num_vec = 100,\n                map_fixed_ptsnum_per_line = 20, # now only support fixed_pts > 0,\n                map_eval_use_same_gt_sample_num_flag = True,\n                map_num_classes = 3,\n            ),   \n            dict(type='LoadGTPlaner'),\n            dict(type='LoadGTMotion',  with_ego_as_agent=with_ego_as_agent),   \n            dict(type='LoadFutBoxInfo'),\n            dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n            dict(type='ObjectNameFilter', classes=class_names),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points', 'img_inputs', 'gt_bboxes_3d', 'gt_labels_3d', 'map_gt_bboxes_3d', 'map_gt_labels_3d']+\n            ['gt_agent_fut_traj', 'gt_agent_fut_traj_mask']+['gt_ego_lcf_feat', 'gt_ego_fut_trajs', 'gt_ego_his_trajs', 'gt_ego_fut_cmd', 'gt_ego_fut_masks']+\n            ['gt_fut_segmentations']\n            \n            )\n            ]\n        )\n]\n\ninput_modality = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\n\nshare_data_config = dict(\n    type=dataset_type,\n    classes=class_names,\n    modality=input_modality,\n    img_info_prototype='bevdet',\n    occupancy_path=occupancy_path,\n    data_root=data_root,\n    use_sequence_group_flag=True,\n)\n\ntest_data_config = dict(\n    pipeline=test_pipeline,\n    map_ann_file=data_root + 'nuscenes_map_infos_102x102_val.pkl',\n    map_eval_cfg=dict(\n        region = (102.4, 102.4) # (H, W)\n    ),\n    load_fut_bbox_info=True,\n    sequences_split_num=test_sequences_split_num,\n    ann_file=data_root + 'bev-next-nuscenes_infos_val.pkl')\n\ndata = dict(\n    samples_per_gpu=samples_per_gpu,\n    workers_per_gpu=2,\n    test_dataloader=dict(runner_type='IterBasedRunnerEval'),\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'bev-next-nuscenes_infos_train.pkl',\n        pipeline=train_pipeline,\n        test_mode=False,\n        use_valid_flag=True,\n        \n        sequences_split_num=train_sequences_split_num,\n        filter_empty_gt=filter_empty_gt,\n        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n        # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n        box_type_3d='LiDAR'),\n    val=test_data_config,\n    test=test_data_config)\n\nfor key in ['train', 'val', 'test']:\n    data[key].update(share_data_config)\n\n\noptimizer = dict(\n    type='AdamW', \n    lr=1e-4,\n    paramwise_cfg=dict(\n        custom_keys={\n            'img_backbone': dict(lr_mult=0.1), \n        }),\n    weight_decay=0.01)\n \noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    min_lr_ratio=1e-3,\n    )\n\nrunner = dict(type='IterBasedRunner', max_iters=num_epochs * num_iters_per_epoch)\ncheckpoint_config = dict(\n    interval=checkpoint_epoch_interval * num_iters_per_epoch)\nevaluation = dict(\n    interval=num_epochs * num_iters_per_epoch, pipeline=test_pipeline)\n\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n    ])\ncustom_hooks = [\n    dict(\n        type='MEGVIIEMAHook',\n        init_updates=10560,\n        priority='NORMAL',\n        interval=checkpoint_epoch_interval*num_iters_per_epoch,\n    ),\n    dict(\n        type='SequentialControlHook',\n        temporal_start_iter=0,\n    ),\n    dict(\n        type='TimerCP',\n    )\n]\n# load_from = None\n# resume_from = None\n"
  },
  {
    "path": "mmdet3d/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\n\nimport mmdet\nimport mmseg\nfrom .version import __version__, short_version\n\n\ndef digit_version(version_str):\n    digit_version = []\n    for x in version_str.split('.'):\n        if x.isdigit():\n            digit_version.append(int(x))\n        elif x.find('rc') != -1:\n            patch_version = x.split('rc')\n            digit_version.append(int(patch_version[0]) - 1)\n            digit_version.append(int(patch_version[1]))\n    return digit_version\n\n\nmmcv_minimum_version = '1.5.2'\nmmcv_maximum_version = '1.7.0'\nmmcv_version = digit_version(mmcv.__version__)\n\n\nassert (mmcv_version >= digit_version(mmcv_minimum_version)\n        and mmcv_version <= digit_version(mmcv_maximum_version)), \\\n    f'MMCV=={mmcv.__version__} is used but incompatible. ' \\\n    f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.'\n\nmmdet_minimum_version = '2.24.0'\nmmdet_maximum_version = '3.0.0'\nmmdet_version = digit_version(mmdet.__version__)\nassert (mmdet_version >= digit_version(mmdet_minimum_version)\n        and mmdet_version <= digit_version(mmdet_maximum_version)), \\\n    f'MMDET=={mmdet.__version__} is used but incompatible. ' \\\n    f'Please install mmdet>={mmdet_minimum_version}, ' \\\n    f'<={mmdet_maximum_version}.'\n\nmmseg_minimum_version = '0.20.0'\nmmseg_maximum_version = '1.0.0'\nmmseg_version = digit_version(mmseg.__version__)\nassert (mmseg_version >= digit_version(mmseg_minimum_version)\n        and mmseg_version <= digit_version(mmseg_maximum_version)), \\\n    f'MMSEG=={mmseg.__version__} is used but incompatible. ' \\\n    f'Please install mmseg>={mmseg_minimum_version}, ' \\\n    f'<={mmseg_maximum_version}.'\n\n__all__ = ['__version__', 'short_version']\n"
  },
  {
    "path": "mmdet3d/apis/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .inference import (convert_SyncBN, inference_detector,\n                        inference_mono_3d_detector,\n                        inference_multi_modality_detector, inference_segmentor,\n                        init_model, show_result_meshlab)\nfrom .test import single_gpu_test\nfrom .train import init_random_seed, train_model\n\n__all__ = [\n    'inference_detector', 'init_model', 'single_gpu_test',\n    'inference_mono_3d_detector', 'show_result_meshlab', 'convert_SyncBN',\n    'train_model', 'inference_multi_modality_detector', 'inference_segmentor',\n    'init_random_seed'\n]\n"
  },
  {
    "path": "mmdet3d/apis/inference.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport re\nfrom copy import deepcopy\nfrom os import path as osp\n\nimport mmcv\nimport numpy as np\nimport torch\nfrom mmcv.parallel import collate, scatter\nfrom mmcv.runner import load_checkpoint\n\nfrom mmdet3d.core import (Box3DMode, CameraInstance3DBoxes, Coord3DMode,\n                          DepthInstance3DBoxes, LiDARInstance3DBoxes,\n                          show_multi_modality_result, show_result,\n                          show_seg_result)\nfrom mmdet3d.core.bbox import get_box_type\nfrom mmdet3d.datasets.pipelines import Compose\nfrom mmdet3d.models import build_model\nfrom mmdet3d.utils import get_root_logger\n\n\ndef convert_SyncBN(config):\n    \"\"\"Convert config's naiveSyncBN to BN.\n\n    Args:\n         config (str or :obj:`mmcv.Config`): Config file path or the config\n            object.\n    \"\"\"\n    if isinstance(config, dict):\n        for item in config:\n            if item == 'norm_cfg':\n                config[item]['type'] = config[item]['type']. \\\n                                    replace('naiveSyncBN', 'BN')\n            else:\n                convert_SyncBN(config[item])\n\n\ndef init_model(config, checkpoint=None, device='cuda:0'):\n    \"\"\"Initialize a model from config file, which could be a 3D detector or a\n    3D segmentor.\n\n    Args:\n        config (str or :obj:`mmcv.Config`): Config file path or the config\n            object.\n        checkpoint (str, optional): Checkpoint path. If left as None, the model\n            will not load any weights.\n        device (str): Device to use.\n\n    Returns:\n        nn.Module: The constructed detector.\n    \"\"\"\n    if isinstance(config, str):\n        config = mmcv.Config.fromfile(config)\n    elif not isinstance(config, mmcv.Config):\n        raise TypeError('config must be a filename or Config object, '\n                        f'but got {type(config)}')\n    config.model.pretrained = None\n    convert_SyncBN(config.model)\n    config.model.train_cfg = None\n    model = build_model(config.model, test_cfg=config.get('test_cfg'))\n    if checkpoint is not None:\n        checkpoint = load_checkpoint(model, checkpoint, map_location='cpu')\n        if 'CLASSES' in checkpoint['meta']:\n            model.CLASSES = checkpoint['meta']['CLASSES']\n        else:\n            model.CLASSES = config.class_names\n        if 'PALETTE' in checkpoint['meta']:  # 3D Segmentor\n            model.PALETTE = checkpoint['meta']['PALETTE']\n    model.cfg = config  # save the config in the model for convenience\n    if device != 'cpu':\n        torch.cuda.set_device(device)\n    else:\n        logger = get_root_logger()\n        logger.warning('Don\\'t suggest using CPU device. '\n                       'Some functions are not supported for now.')\n    model.to(device)\n    model.eval()\n    return model\n\n\ndef inference_detector(model, pcd):\n    \"\"\"Inference point cloud with the detector.\n\n    Args:\n        model (nn.Module): The loaded detector.\n        pcd (str): Point cloud files.\n\n    Returns:\n        tuple: Predicted results and data from pipeline.\n    \"\"\"\n    cfg = model.cfg\n    device = next(model.parameters()).device  # model device\n\n    if not isinstance(pcd, str):\n        cfg = cfg.copy()\n        # set loading pipeline type\n        cfg.data.test.pipeline[0].type = 'LoadPointsFromDict'\n\n    # build the data pipeline\n    test_pipeline = deepcopy(cfg.data.test.pipeline)\n    test_pipeline = Compose(test_pipeline)\n    box_type_3d, box_mode_3d = get_box_type(cfg.data.test.box_type_3d)\n\n    if isinstance(pcd, str):\n        # load from point clouds file\n        data = dict(\n            pts_filename=pcd,\n            box_type_3d=box_type_3d,\n            box_mode_3d=box_mode_3d,\n            # for ScanNet demo we need axis_align_matrix\n            ann_info=dict(axis_align_matrix=np.eye(4)),\n            sweeps=[],\n            # set timestamp = 0\n            timestamp=[0],\n            img_fields=[],\n            bbox3d_fields=[],\n            pts_mask_fields=[],\n            pts_seg_fields=[],\n            bbox_fields=[],\n            mask_fields=[],\n            seg_fields=[])\n    else:\n        # load from http\n        data = dict(\n            points=pcd,\n            box_type_3d=box_type_3d,\n            box_mode_3d=box_mode_3d,\n            # for ScanNet demo we need axis_align_matrix\n            ann_info=dict(axis_align_matrix=np.eye(4)),\n            sweeps=[],\n            # set timestamp = 0\n            timestamp=[0],\n            img_fields=[],\n            bbox3d_fields=[],\n            pts_mask_fields=[],\n            pts_seg_fields=[],\n            bbox_fields=[],\n            mask_fields=[],\n            seg_fields=[])\n    data = test_pipeline(data)\n    data = collate([data], samples_per_gpu=1)\n    if next(model.parameters()).is_cuda:\n        # scatter to specified GPU\n        data = scatter(data, [device.index])[0]\n    else:\n        # this is a workaround to avoid the bug of MMDataParallel\n        data['img_metas'] = data['img_metas'][0].data\n        data['points'] = data['points'][0].data\n    # forward the model\n    with torch.no_grad():\n        result = model(return_loss=False, rescale=True, **data)\n    return result, data\n\n\ndef inference_multi_modality_detector(model, pcd, image, ann_file):\n    \"\"\"Inference point cloud with the multi-modality detector.\n\n    Args:\n        model (nn.Module): The loaded detector.\n        pcd (str): Point cloud files.\n        image (str): Image files.\n        ann_file (str): Annotation files.\n\n    Returns:\n        tuple: Predicted results and data from pipeline.\n    \"\"\"\n    cfg = model.cfg\n    device = next(model.parameters()).device  # model device\n    # build the data pipeline\n    test_pipeline = deepcopy(cfg.data.test.pipeline)\n    test_pipeline = Compose(test_pipeline)\n    box_type_3d, box_mode_3d = get_box_type(cfg.data.test.box_type_3d)\n    # get data info containing calib\n    data_infos = mmcv.load(ann_file)\n    image_idx = int(re.findall(r'\\d+', image)[-1])  # xxx/sunrgbd_000017.jpg\n    for x in data_infos:\n        if int(x['image']['image_idx']) != image_idx:\n            continue\n        info = x\n        break\n    data = dict(\n        pts_filename=pcd,\n        img_prefix=osp.dirname(image),\n        img_info=dict(filename=osp.basename(image)),\n        box_type_3d=box_type_3d,\n        box_mode_3d=box_mode_3d,\n        img_fields=[],\n        bbox3d_fields=[],\n        pts_mask_fields=[],\n        pts_seg_fields=[],\n        bbox_fields=[],\n        mask_fields=[],\n        seg_fields=[])\n    data = test_pipeline(data)\n\n    # TODO: this code is dataset-specific. Move lidar2img and\n    #       depth2img to .pkl annotations in the future.\n    # LiDAR to image conversion\n    if box_mode_3d == Box3DMode.LIDAR:\n        rect = info['calib']['R0_rect'].astype(np.float32)\n        Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)\n        P2 = info['calib']['P2'].astype(np.float32)\n        lidar2img = P2 @ rect @ Trv2c\n        data['img_metas'][0].data['lidar2img'] = lidar2img\n    # Depth to image conversion\n    elif box_mode_3d == Box3DMode.DEPTH:\n        rt_mat = info['calib']['Rt']\n        # follow Coord3DMode.convert_point\n        rt_mat = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]\n                           ]) @ rt_mat.transpose(1, 0)\n        depth2img = info['calib']['K'] @ rt_mat\n        data['img_metas'][0].data['depth2img'] = depth2img\n\n    data = collate([data], samples_per_gpu=1)\n    if next(model.parameters()).is_cuda:\n        # scatter to specified GPU\n        data = scatter(data, [device.index])[0]\n    else:\n        # this is a workaround to avoid the bug of MMDataParallel\n        data['img_metas'] = data['img_metas'][0].data\n        data['points'] = data['points'][0].data\n        data['img'] = data['img'][0].data\n\n    # forward the model\n    with torch.no_grad():\n        result = model(return_loss=False, rescale=True, **data)\n    return result, data\n\n\ndef inference_mono_3d_detector(model, image, ann_file):\n    \"\"\"Inference image with the monocular 3D detector.\n\n    Args:\n        model (nn.Module): The loaded detector.\n        image (str): Image files.\n        ann_file (str): Annotation files.\n\n    Returns:\n        tuple: Predicted results and data from pipeline.\n    \"\"\"\n    cfg = model.cfg\n    device = next(model.parameters()).device  # model device\n    # build the data pipeline\n    test_pipeline = deepcopy(cfg.data.test.pipeline)\n    test_pipeline = Compose(test_pipeline)\n    box_type_3d, box_mode_3d = get_box_type(cfg.data.test.box_type_3d)\n    # get data info containing calib\n    data_infos = mmcv.load(ann_file)\n    # find the info corresponding to this image\n    for x in data_infos['images']:\n        if osp.basename(x['file_name']) != osp.basename(image):\n            continue\n        img_info = x\n        break\n    data = dict(\n        img_prefix=osp.dirname(image),\n        img_info=dict(filename=osp.basename(image)),\n        box_type_3d=box_type_3d,\n        box_mode_3d=box_mode_3d,\n        img_fields=[],\n        bbox3d_fields=[],\n        pts_mask_fields=[],\n        pts_seg_fields=[],\n        bbox_fields=[],\n        mask_fields=[],\n        seg_fields=[])\n\n    # camera points to image conversion\n    if box_mode_3d == Box3DMode.CAM:\n        data['img_info'].update(dict(cam_intrinsic=img_info['cam_intrinsic']))\n\n    data = test_pipeline(data)\n\n    data = collate([data], samples_per_gpu=1)\n    if next(model.parameters()).is_cuda:\n        # scatter to specified GPU\n        data = scatter(data, [device.index])[0]\n    else:\n        # this is a workaround to avoid the bug of MMDataParallel\n        data['img_metas'] = data['img_metas'][0].data\n        data['img'] = data['img'][0].data\n\n    # forward the model\n    with torch.no_grad():\n        result = model(return_loss=False, rescale=True, **data)\n    return result, data\n\n\ndef inference_segmentor(model, pcd):\n    \"\"\"Inference point cloud with the segmentor.\n\n    Args:\n        model (nn.Module): The loaded segmentor.\n        pcd (str): Point cloud files.\n\n    Returns:\n        tuple: Predicted results and data from pipeline.\n    \"\"\"\n    cfg = model.cfg\n    device = next(model.parameters()).device  # model device\n    # build the data pipeline\n    test_pipeline = deepcopy(cfg.data.test.pipeline)\n    test_pipeline = Compose(test_pipeline)\n    data = dict(\n        pts_filename=pcd,\n        img_fields=[],\n        bbox3d_fields=[],\n        pts_mask_fields=[],\n        pts_seg_fields=[],\n        bbox_fields=[],\n        mask_fields=[],\n        seg_fields=[])\n    data = test_pipeline(data)\n    data = collate([data], samples_per_gpu=1)\n    if next(model.parameters()).is_cuda:\n        # scatter to specified GPU\n        data = scatter(data, [device.index])[0]\n    else:\n        # this is a workaround to avoid the bug of MMDataParallel\n        data['img_metas'] = data['img_metas'][0].data\n        data['points'] = data['points'][0].data\n    # forward the model\n    with torch.no_grad():\n        result = model(return_loss=False, rescale=True, **data)\n    return result, data\n\n\ndef show_det_result_meshlab(data,\n                            result,\n                            out_dir,\n                            score_thr=0.0,\n                            show=False,\n                            snapshot=False):\n    \"\"\"Show 3D detection result by meshlab.\"\"\"\n    points = data['points'][0][0].cpu().numpy()\n    pts_filename = data['img_metas'][0][0]['pts_filename']\n    file_name = osp.split(pts_filename)[-1].split('.')[0]\n\n    if 'pts_bbox' in result[0].keys():\n        pred_bboxes = result[0]['pts_bbox']['boxes_3d'].tensor.numpy()\n        pred_scores = result[0]['pts_bbox']['scores_3d'].numpy()\n    else:\n        pred_bboxes = result[0]['boxes_3d'].tensor.numpy()\n        pred_scores = result[0]['scores_3d'].numpy()\n\n    # filter out low score bboxes for visualization\n    if score_thr > 0:\n        inds = pred_scores > score_thr\n        pred_bboxes = pred_bboxes[inds]\n\n    # for now we convert points into depth mode\n    box_mode = data['img_metas'][0][0]['box_mode_3d']\n    if box_mode != Box3DMode.DEPTH:\n        points = Coord3DMode.convert(points, box_mode, Coord3DMode.DEPTH)\n        show_bboxes = Box3DMode.convert(pred_bboxes, box_mode, Box3DMode.DEPTH)\n    else:\n        show_bboxes = deepcopy(pred_bboxes)\n\n    show_result(\n        points,\n        None,\n        show_bboxes,\n        out_dir,\n        file_name,\n        show=show,\n        snapshot=snapshot)\n\n    return file_name\n\n\ndef show_seg_result_meshlab(data,\n                            result,\n                            out_dir,\n                            palette,\n                            show=False,\n                            snapshot=False):\n    \"\"\"Show 3D segmentation result by meshlab.\"\"\"\n    points = data['points'][0][0].cpu().numpy()\n    pts_filename = data['img_metas'][0][0]['pts_filename']\n    file_name = osp.split(pts_filename)[-1].split('.')[0]\n\n    pred_seg = result[0]['semantic_mask'].numpy()\n\n    if palette is None:\n        # generate random color map\n        max_idx = pred_seg.max()\n        palette = np.random.randint(0, 256, size=(max_idx + 1, 3))\n    palette = np.array(palette).astype(np.int)\n\n    show_seg_result(\n        points,\n        None,\n        pred_seg,\n        out_dir,\n        file_name,\n        palette=palette,\n        show=show,\n        snapshot=snapshot)\n\n    return file_name\n\n\ndef show_proj_det_result_meshlab(data,\n                                 result,\n                                 out_dir,\n                                 score_thr=0.0,\n                                 show=False,\n                                 snapshot=False):\n    \"\"\"Show result of projecting 3D bbox to 2D image by meshlab.\"\"\"\n    assert 'img' in data.keys(), 'image data is not provided for visualization'\n\n    img_filename = data['img_metas'][0][0]['filename']\n    file_name = osp.split(img_filename)[-1].split('.')[0]\n\n    # read from file because img in data_dict has undergone pipeline transform\n    img = mmcv.imread(img_filename)\n\n    if 'pts_bbox' in result[0].keys():\n        result[0] = result[0]['pts_bbox']\n    elif 'img_bbox' in result[0].keys():\n        result[0] = result[0]['img_bbox']\n    pred_bboxes = result[0]['boxes_3d'].tensor.numpy()\n    pred_scores = result[0]['scores_3d'].numpy()\n\n    # filter out low score bboxes for visualization\n    if score_thr > 0:\n        inds = pred_scores > score_thr\n        pred_bboxes = pred_bboxes[inds]\n\n    box_mode = data['img_metas'][0][0]['box_mode_3d']\n    if box_mode == Box3DMode.LIDAR:\n        if 'lidar2img' not in data['img_metas'][0][0]:\n            raise NotImplementedError(\n                'LiDAR to image transformation matrix is not provided')\n\n        show_bboxes = LiDARInstance3DBoxes(pred_bboxes, origin=(0.5, 0.5, 0))\n\n        show_multi_modality_result(\n            img,\n            None,\n            show_bboxes,\n            data['img_metas'][0][0]['lidar2img'],\n            out_dir,\n            file_name,\n            box_mode='lidar',\n            show=show)\n    elif box_mode == Box3DMode.DEPTH:\n        show_bboxes = DepthInstance3DBoxes(pred_bboxes, origin=(0.5, 0.5, 0))\n\n        show_multi_modality_result(\n            img,\n            None,\n            show_bboxes,\n            None,\n            out_dir,\n            file_name,\n            box_mode='depth',\n            img_metas=data['img_metas'][0][0],\n            show=show)\n    elif box_mode == Box3DMode.CAM:\n        if 'cam2img' not in data['img_metas'][0][0]:\n            raise NotImplementedError(\n                'camera intrinsic matrix is not provided')\n\n        show_bboxes = CameraInstance3DBoxes(\n            pred_bboxes, box_dim=pred_bboxes.shape[-1], origin=(0.5, 1.0, 0.5))\n\n        show_multi_modality_result(\n            img,\n            None,\n            show_bboxes,\n            data['img_metas'][0][0]['cam2img'],\n            out_dir,\n            file_name,\n            box_mode='camera',\n            show=show)\n    else:\n        raise NotImplementedError(\n            f'visualization of {box_mode} bbox is not supported')\n\n    return file_name\n\n\ndef show_result_meshlab(data,\n                        result,\n                        out_dir,\n                        score_thr=0.0,\n                        show=False,\n                        snapshot=False,\n                        task='det',\n                        palette=None):\n    \"\"\"Show result by meshlab.\n\n    Args:\n        data (dict): Contain data from pipeline.\n        result (dict): Predicted result from model.\n        out_dir (str): Directory to save visualized result.\n        score_thr (float, optional): Minimum score of bboxes to be shown.\n            Default: 0.0\n        show (bool, optional): Visualize the results online. Defaults to False.\n        snapshot (bool, optional): Whether to save the online results.\n            Defaults to False.\n        task (str, optional): Distinguish which task result to visualize.\n            Currently we support 3D detection, multi-modality detection and\n            3D segmentation. Defaults to 'det'.\n        palette (list[list[int]]] | np.ndarray, optional): The palette\n            of segmentation map. If None is given, random palette will be\n            generated. Defaults to None.\n    \"\"\"\n    assert task in ['det', 'multi_modality-det', 'seg', 'mono-det'], \\\n        f'unsupported visualization task {task}'\n    assert out_dir is not None, 'Expect out_dir, got none.'\n\n    if task in ['det', 'multi_modality-det']:\n        file_name = show_det_result_meshlab(data, result, out_dir, score_thr,\n                                            show, snapshot)\n\n    if task in ['seg']:\n        file_name = show_seg_result_meshlab(data, result, out_dir, palette,\n                                            show, snapshot)\n\n    if task in ['multi_modality-det', 'mono-det']:\n        file_name = show_proj_det_result_meshlab(data, result, out_dir,\n                                                 score_thr, show, snapshot)\n\n    return out_dir, file_name\n"
  },
  {
    "path": "mmdet3d/apis/test.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom os import path as osp\n\nimport mmcv\nimport torch\nfrom mmcv.image import tensor2imgs\nimport time\nfrom mmdet3d.models import (Base3DDetector, Base3DSegmentor,\n                            SingleStageMono3DDetector)\n\n\ndef single_gpu_test(model,\n                    data_loader,\n                    show=False,\n                    out_dir=None,\n                    show_score_thr=0.3):\n    \"\"\"Test model with single gpu.\n\n    This method tests model with single gpu and gives the 'show' option.\n    By setting ``show=True``, it saves the visualization results under\n    ``out_dir``.\n\n    Args:\n        model (nn.Module): Model to be tested.\n        data_loader (nn.Dataloader): Pytorch data loader.\n        show (bool, optional): Whether to save viualization results.\n            Default: True.\n        out_dir (str, optional): The path to save visualization results.\n            Default: None.\n\n    Returns:\n        list[dict]: The prediction results.\n    \"\"\"\n    model.eval()\n    results = []\n    dataset = data_loader.dataset\n    prog_bar = mmcv.ProgressBar(len(dataset))\n    for i, data in enumerate(data_loader):\n        with torch.no_grad():\n            result = model(return_loss=False, rescale=True, **data)\n\n        if show:\n            # Visualize the results of MMDetection3D model\n            # 'show_results' is MMdetection3D visualization API\n            models_3d = (Base3DDetector, Base3DSegmentor,\n                         SingleStageMono3DDetector)\n            if isinstance(model.module, models_3d):\n                model.module.show_results(\n                    data,\n                    result,\n                    out_dir=out_dir,\n                    show=show,\n                    score_thr=show_score_thr)\n            # Visualize the results of MMDetection model\n            # 'show_result' is MMdetection visualization API\n            else:\n                batch_size = len(result)\n                if batch_size == 1 and isinstance(data['img'][0],\n                                                  torch.Tensor):\n                    img_tensor = data['img'][0]\n                else:\n                    img_tensor = data['img'][0].data[0]\n                img_metas = data['img_metas'][0].data[0]\n                imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])\n                assert len(imgs) == len(img_metas)\n\n                for i, (img, img_meta) in enumerate(zip(imgs, img_metas)):\n                    h, w, _ = img_meta['img_shape']\n                    img_show = img[:h, :w, :]\n\n                    ori_h, ori_w = img_meta['ori_shape'][:-1]\n                    img_show = mmcv.imresize(img_show, (ori_w, ori_h))\n\n                    if out_dir:\n                        out_file = osp.join(out_dir, img_meta['ori_filename'])\n                    else:\n                        out_file = None\n\n                    model.module.show_result(\n                        img_show,\n                        result[i],\n                        show=show,\n                        out_file=out_file,\n                        score_thr=show_score_thr)\n        results.extend(result)\n\n        batch_size = len(result)\n        for _ in range(batch_size):\n            prog_bar.update()\n    return results\n\n\n# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ---------------------------------------------\n#  Modified by Zhiqi Li\n# ---------------------------------------------\nimport os.path as osp\nimport pickle\nimport shutil\nimport tempfile\nimport time\n\nimport mmcv\nimport torch\nimport torch.distributed as dist\nfrom mmcv.image import tensor2imgs\nfrom mmcv.runner import get_dist_info\n\nfrom mmdet.core import encode_mask_results\n\n\nimport mmcv\nimport numpy as np\nimport pycocotools.mask as mask_util\n\ndef custom_encode_mask_results(mask_results):\n    \"\"\"Encode bitmap mask to RLE code. Semantic Masks only\n    Args:\n        mask_results (list | tuple[list]): bitmap mask results.\n            In mask scoring rcnn, mask_results is a tuple of (segm_results,\n            segm_cls_score).\n    Returns:\n        list | tuple: RLE encoded mask.\n    \"\"\"\n    cls_segms = mask_results\n    num_classes = len(cls_segms)\n    encoded_mask_results = []\n    for i in range(len(cls_segms)):\n        encoded_mask_results.append(\n            mask_util.encode(\n                np.array(\n                    cls_segms[i][:, :, np.newaxis], order='F',\n                        dtype='uint8'))[0])  # encoded with RLE\n    return [encoded_mask_results]\n\ndef custom_multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):\n    \"\"\"Test model with multiple gpus.\n    This method tests model with multiple gpus and collects the results\n    under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'\n    it encodes results to gpu tensors and use gpu communication for results\n    collection. On cpu mode it saves the results on different gpus to 'tmpdir'\n    and collects them by the rank 0 worker.\n    Args:\n        model (nn.Module): Model to be tested.\n        data_loader (nn.Dataloader): Pytorch data loader.\n        tmpdir (str): Path of directory to save the temporary results from\n            different gpus under cpu mode.\n        gpu_collect (bool): Option to use either gpu or cpu to collect results.\n    Returns:\n        list: The prediction results.\n    \"\"\"\n    model.eval()\n    bbox_results = []\n    mask_results = []\n    dataset = data_loader.dataset\n    rank, world_size = get_dist_info()\n    if rank == 0:\n        prog_bar = mmcv.ProgressBar(len(dataset))\n    time.sleep(2)  # This line can prevent deadlock problem in some cases.\n    have_mask = False\n    for i, data in enumerate(data_loader):\n        with torch.no_grad():\n            result = model(return_loss=False, rescale=True, **data)\n            # encode mask results\n            if isinstance(result, dict):\n                if 'bbox_results' in result.keys():\n                    bbox_result = result['bbox_results']\n                    batch_size = len(result['bbox_results'])\n                    bbox_results.extend(bbox_result)\n                if 'mask_results' in result.keys() and result['mask_results'] is not None:\n                    mask_result = custom_encode_mask_results(result['mask_results'])\n                    mask_results.extend(mask_result)\n                    have_mask = True\n            else:\n                batch_size = len(result)\n                bbox_results.extend(result)\n\n            #if isinstance(result[0], tuple):\n            #    assert False, 'this code is for instance segmentation, which our code will not utilize.'\n            #    result = [(bbox_results, encode_mask_results(mask_results))\n            #              for bbox_results, mask_results in result]\n        if rank == 0:\n            \n            for _ in range(batch_size * world_size):\n                prog_bar.update()\n\n    # collect results from all ranks\n    if gpu_collect:\n        bbox_results = collect_results_gpu(bbox_results, len(dataset))\n        if have_mask:\n            mask_results = collect_results_gpu(mask_results, len(dataset))\n        else:\n            mask_results = None\n    else:\n        bbox_results = collect_results_cpu(bbox_results, len(dataset), tmpdir)\n        tmpdir = tmpdir+'_mask' if tmpdir is not None else None\n        if have_mask:\n            mask_results = collect_results_cpu(mask_results, len(dataset), tmpdir)\n        else:\n            mask_results = None\n\n    if mask_results is None:\n        return bbox_results\n    return {'bbox_results': bbox_results, 'mask_results': mask_results}\n\n\n\n\ndef collect_results_cpu(result_part, size, tmpdir=None):\n    rank, world_size = get_dist_info()\n    # create a tmp dir if it is not specified\n    tmpdir = None\n    if tmpdir is None:\n        MAX_LEN = 512\n        # 32 is whitespace\n        dir_tensor = torch.full((MAX_LEN, ),\n                                32,\n                                dtype=torch.uint8,\n                                device='cuda')\n        if rank == 0:\n            mmcv.mkdir_or_exist('.dist_test')\n            prefix = str(time.time())[-5:]\n            tmpdir = tempfile.mkdtemp(dir='.dist_test', prefix=prefix)\n            tmpdir = torch.tensor(\n                bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')\n            dir_tensor[:len(tmpdir)] = tmpdir\n        dist.broadcast(dir_tensor, 0)\n        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()\n    else:\n        mmcv.mkdir_or_exist(tmpdir)\n    # dump the part result to the dir\n    mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))\n    dist.barrier()\n    # collect all parts\n    if rank != 0:\n        return None\n    else:\n        # load results of all parts from tmp dir\n        part_list = []\n        for i in range(world_size):\n            part_file = osp.join(tmpdir, f'part_{i}.pkl')\n            part_list.append(mmcv.load(part_file))\n        # sort the results\n        ordered_results = []\n        '''\n        bacause we change the sample of the evaluation stage to make sure that each gpu will handle continuous sample,\n        '''\n        #for res in zip(*part_list):\n        for res in part_list:  \n            ordered_results.extend(list(res))\n        # the dataloader may pad some samples\n        ordered_results = ordered_results #[:size]\n        # remove tmp dir\n        shutil.rmtree(tmpdir)\n        return ordered_results\n\n\ndef collect_results_gpu(result_part, size):\n    collect_results_cpu(result_part, size)"
  },
  {
    "path": "mmdet3d/apis/train.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport random\nimport warnings\n\nimport numpy as np\nimport torch\nfrom mmcv.parallel import MMDataParallel, MMDistributedDataParallel\nfrom mmcv.runner import (HOOKS, DistSamplerSeedHook, EpochBasedRunner,\n                         Fp16OptimizerHook, OptimizerHook, build_optimizer,\n                         build_runner, get_dist_info)\nfrom mmcv.utils import build_from_cfg\nfrom torch import distributed as dist\n\nfrom mmdet3d.datasets import build_dataset\nfrom mmdet3d.utils import find_latest_checkpoint\nfrom mmdet.core import DistEvalHook as MMDET_DistEvalHook\nfrom mmdet.core import EvalHook as MMDET_EvalHook\nfrom mmdet3d.datasets import build_dataloader as build_mmdet_dataloader\nfrom mmdet.datasets import replace_ImageToTensor\nfrom mmdet.utils import get_root_logger as get_mmdet_root_logger\nfrom mmseg.core import DistEvalHook as MMSEG_DistEvalHook\nfrom mmseg.core import EvalHook as MMSEG_EvalHook\nfrom mmseg.datasets import build_dataloader as build_mmseg_dataloader\nfrom mmseg.utils import get_root_logger as get_mmseg_root_logger\nimport time\nfrom mmdet3d.models.fbbev.utils import CustomDistEvalHook\nimport os.path as osp\n\ndef init_random_seed(seed=None, device='cuda'):\n    \"\"\"Initialize random seed.\n\n    If the seed is not set, the seed will be automatically randomized,\n    and then broadcast to all processes to prevent some potential bugs.\n    Args:\n        seed (int, optional): The seed. Default to None.\n        device (str, optional): The device where the seed will be put on.\n            Default to 'cuda'.\n    Returns:\n        int: Seed to be used.\n    \"\"\"\n    if seed is not None:\n        return seed\n\n    # Make sure all ranks share the same random seed to prevent\n    # some potential bugs. Please refer to\n    # https://github.com/open-mmlab/mmdetection/issues/6339\n    rank, world_size = get_dist_info()\n    seed = np.random.randint(2**31)\n    if world_size == 1:\n        return seed\n\n    if rank == 0:\n        random_num = torch.tensor(seed, dtype=torch.int32, device=device)\n    else:\n        random_num = torch.tensor(0, dtype=torch.int32, device=device)\n    dist.broadcast(random_num, src=0)\n    return random_num.item()\n\n\ndef set_random_seed(seed, deterministic=False):\n    \"\"\"Set random seed.\n\n    Args:\n        seed (int): Seed to be used.\n        deterministic (bool): Whether to set the deterministic option for\n            CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`\n            to True and `torch.backends.cudnn.benchmark` to False.\n            Default: False.\n    \"\"\"\n    random.seed(seed)\n    np.random.seed(seed)\n    torch.manual_seed(seed)\n    torch.cuda.manual_seed_all(seed)\n    if deterministic:\n        torch.backends.cudnn.deterministic = True\n        torch.backends.cudnn.benchmark = False\n\n\ndef train_segmentor(model,\n                    dataset,\n                    cfg,\n                    distributed=False,\n                    validate=False,\n                    timestamp=None,\n                    meta=None):\n    \"\"\"Launch segmentor training.\"\"\"\n    logger = get_mmseg_root_logger(cfg.log_level)\n\n    # prepare data loaders\n    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]\n    data_loaders = [\n        build_mmseg_dataloader(\n            ds,\n            cfg.data.samples_per_gpu,\n            cfg.data.workers_per_gpu,\n            # cfg.gpus will be ignored if distributed\n            len(cfg.gpu_ids),\n            dist=distributed,\n            seed=cfg.seed,\n            drop_last=True) for ds in dataset\n    ]\n\n    # put model on gpus\n    if distributed:\n        find_unused_parameters = cfg.get('find_unused_parameters', False)\n        # Sets the `find_unused_parameters` parameter in\n        # torch.nn.parallel.DistributedDataParallel\n        model = MMDistributedDataParallel(\n            model.cuda(),\n            device_ids=[torch.cuda.current_device()],\n            broadcast_buffers=False,\n            find_unused_parameters=find_unused_parameters)\n    else:\n        model = MMDataParallel(\n            model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)\n\n    # build runner\n    optimizer = build_optimizer(model, cfg.optimizer)\n\n    if cfg.get('runner') is None:\n        cfg.runner = {'type': 'IterBasedRunner', 'max_iters': cfg.total_iters}\n        warnings.warn(\n            'config is now expected to have a `runner` section, '\n            'please set `runner` in your config.', UserWarning)\n\n    runner = build_runner(\n        cfg.runner,\n        default_args=dict(\n            model=model,\n            batch_processor=None,\n            optimizer=optimizer,\n            work_dir=cfg.work_dir,\n            logger=logger,\n            meta=meta))\n\n    # register hooks\n    runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config,\n                                   cfg.checkpoint_config, cfg.log_config,\n                                   cfg.get('momentum_config', None))\n\n    # an ugly walkaround to make the .log and .log.json filenames the same\n    runner.timestamp = timestamp\n\n    # register eval hooks\n    if validate:\n        val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))\n        val_dataloader = build_mmseg_dataloader(\n            val_dataset,\n            samples_per_gpu=1,\n            workers_per_gpu=cfg.data.workers_per_gpu,\n            dist=distributed,\n            shuffle=False)\n        eval_cfg = cfg.get('evaluation', {})\n        eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner'\n        eval_hook = MMSEG_DistEvalHook if distributed else MMSEG_EvalHook\n        # In this PR (https://github.com/open-mmlab/mmcv/pull/1193), the\n        # priority of IterTimerHook has been modified from 'NORMAL' to 'LOW'.\n\n        runner.register_hook(\n            eval_hook(val_dataloader, **eval_cfg), priority='LOW')\n\n    # user-defined hooks\n    if cfg.get('custom_hooks', None):\n        custom_hooks = cfg.custom_hooks\n        assert isinstance(custom_hooks, list), \\\n            f'custom_hooks expect list type, but got {type(custom_hooks)}'\n        for hook_cfg in cfg.custom_hooks:\n            assert isinstance(hook_cfg, dict), \\\n                'Each item in custom_hooks expects dict type, but got ' \\\n                f'{type(hook_cfg)}'\n            hook_cfg = hook_cfg.copy()\n            priority = hook_cfg.pop('priority', 'NORMAL')\n            hook = build_from_cfg(hook_cfg, HOOKS)\n            runner.register_hook(hook, priority=priority)\n\n    if cfg.resume_from:\n        runner.resume(cfg.resume_from)\n    elif cfg.load_from:\n        runner.load_checkpoint(cfg.load_from)\n    runner.run(data_loaders, cfg.workflow)\n\n\ndef train_detector(model,\n                   dataset,\n                   cfg,\n                   distributed=False,\n                   validate=False,\n                   timestamp=None,\n                   meta=None):\n    logger = get_mmdet_root_logger(log_level=cfg.log_level)\n\n    # prepare data loaders\n    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]\n    if 'imgs_per_gpu' in cfg.data:\n        logger.warning('\"imgs_per_gpu\" is deprecated in MMDet V2.0. '\n                       'Please use \"samples_per_gpu\" instead')\n        if 'samples_per_gpu' in cfg.data:\n            logger.warning(\n                f'Got \"imgs_per_gpu\"={cfg.data.imgs_per_gpu} and '\n                f'\"samples_per_gpu\"={cfg.data.samples_per_gpu}, \"imgs_per_gpu\"'\n                f'={cfg.data.imgs_per_gpu} is used in this experiments')\n        else:\n            logger.warning(\n                'Automatically set \"samples_per_gpu\"=\"imgs_per_gpu\"='\n                f'{cfg.data.imgs_per_gpu} in this experiments')\n        cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu\n\n    runner_type = 'EpochBasedRunner' if 'runner' not in cfg else cfg.runner[\n        'type']\n    data_loaders = [\n        build_mmdet_dataloader(\n            ds,\n            cfg.data.samples_per_gpu,\n            cfg.data.workers_per_gpu,\n            # `num_gpus` will be ignored if distributed\n            num_gpus=len(cfg.gpu_ids),\n            dist=distributed,\n            seed=cfg.seed,\n            runner_type=runner_type,\n            persistent_workers=cfg.data.get('persistent_workers', False))\n        for ds in dataset\n    ]\n\n    # put model on gpus\n    if distributed:\n        find_unused_parameters = cfg.get('find_unused_parameters', False)\n        # Sets the `find_unused_parameters` parameter in\n        # torch.nn.parallel.DistributedDataParallel\n        model = MMDistributedDataParallel(\n            model.cuda(),\n            device_ids=[torch.cuda.current_device()],\n            broadcast_buffers=False,\n            find_unused_parameters=find_unused_parameters)\n    else:\n        model = MMDataParallel(\n            model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)\n\n    # build runner\n    optimizer = build_optimizer(model, cfg.optimizer)\n\n    if 'runner' not in cfg:\n        cfg.runner = {\n            'type': 'EpochBasedRunner',\n            'max_epochs': cfg.total_epochs\n        }\n        warnings.warn(\n            'config is now expected to have a `runner` section, '\n            'please set `runner` in your config.', UserWarning)\n    else:\n        if 'total_epochs' in cfg:\n            assert cfg.total_epochs == cfg.runner.max_epochs\n\n    runner = build_runner(\n        cfg.runner,\n        default_args=dict(\n            model=model,\n            optimizer=optimizer,\n            work_dir=cfg.work_dir,\n            logger=logger,\n            meta=meta))\n\n    # an ugly workaround to make .log and .log.json filenames the same\n    runner.timestamp = timestamp\n\n    # fp16 setting\n    fp16_cfg = cfg.get('fp16', None)\n    if fp16_cfg is not None:\n        optimizer_config = Fp16OptimizerHook(\n            **cfg.optimizer_config, **fp16_cfg, distributed=distributed)\n    elif distributed and 'type' not in cfg.optimizer_config:\n        optimizer_config = OptimizerHook(**cfg.optimizer_config)\n    else:\n        optimizer_config = cfg.optimizer_config\n\n    # register hooks\n    runner.register_training_hooks(\n        cfg.lr_config,\n        optimizer_config,\n        cfg.checkpoint_config,\n        cfg.log_config,\n        cfg.get('momentum_config', None),\n        custom_hooks_config=cfg.get('custom_hooks', None))\n\n    if distributed:\n        if isinstance(runner, EpochBasedRunner):\n            runner.register_hook(DistSamplerSeedHook())\n\n    # register eval hooks\n    if validate:\n        # Support batch_size > 1 in validation\n        val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1)\n        if val_samples_per_gpu > 1:\n            # Replace 'ImageToTensor' to 'DefaultFormatBundle'\n            cfg.data.val.pipeline = replace_ImageToTensor(\n                cfg.data.val.pipeline)\n        val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))\n        val_dataloader = build_mmdet_dataloader(\n            val_dataset,\n            samples_per_gpu=val_samples_per_gpu,\n            workers_per_gpu=cfg.data.workers_per_gpu,\n            dist=distributed,\n            val=True,\n            runner_type=cfg.data.test_dataloader.get('runner_type', 'EpochBasedRunner'),\n            shuffle=False)\n        eval_cfg = cfg.get('evaluation', {})\n        eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner'\n        eval_hook = MMDET_DistEvalHook if distributed else MMDET_EvalHook\n        # In this PR (https://github.com/open-mmlab/mmcv/pull/1193), the\n        # priority of IterTimerHook has been modified from 'NORMAL' to 'LOW'.\n        # eval_cfg['jsonfile_prefix'] = osp.join('val', cfg.work_dir, time.ctime().replace(' ','_').replace(':','_'))\n        if cfg.get('use_custom_eval_hook', False):\n            eval_hook = CustomDistEvalHook if distributed else eval_hook\n        runner.register_hook(\n            eval_hook(val_dataloader, work_dir=cfg.work_dir, **eval_cfg), priority='LOW')\n\n    resume_from = None\n    if cfg.resume_from is None and cfg.get('auto_resume'):\n        resume_from = find_latest_checkpoint(cfg.work_dir)\n\n    if resume_from is not None:\n        cfg.resume_from = resume_from\n\n    if cfg.resume_from:\n        runner.resume(cfg.resume_from)\n    elif cfg.load_from:\n        runner.load_checkpoint(cfg.load_from)\n    runner.run(data_loaders, cfg.workflow)\n\n\ndef train_model(model,\n                dataset,\n                cfg,\n                distributed=False,\n                validate=False,\n                timestamp=None,\n                meta=None):\n    \"\"\"A function wrapper for launching model training according to cfg.\n\n    Because we need different eval_hook in runner. Should be deprecated in the\n    future.\n    \"\"\"\n    if cfg.model.type in ['EncoderDecoder3D']:\n        train_segmentor(\n            model,\n            dataset,\n            cfg,\n            distributed=distributed,\n            validate=validate,\n            timestamp=timestamp,\n            meta=meta)\n    else:\n        train_detector(\n            model,\n            dataset,\n            cfg,\n            distributed=distributed,\n            validate=validate,\n            timestamp=timestamp,\n            meta=meta)\n\n"
  },
  {
    "path": "mmdet3d/core/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .anchor import *  # noqa: F401, F403\nfrom .bbox import *  # noqa: F401, F403\nfrom .evaluation import *  # noqa: F401, F403\nfrom .hook import *  # noqa: F401, F403\nfrom .points import *  # noqa: F401, F403\nfrom .post_processing import *  # noqa: F401, F403\nfrom .utils import *  # noqa: F401, F403\nfrom .visualizer import *  # noqa: F401, F403\nfrom .voxel import *  # noqa: F401, F403\n"
  },
  {
    "path": "mmdet3d/core/anchor/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmdet.core.anchor import build_prior_generator\nfrom .anchor_3d_generator import (AlignedAnchor3DRangeGenerator,\n                                  AlignedAnchor3DRangeGeneratorPerCls,\n                                  Anchor3DRangeGenerator)\n\n__all__ = [\n    'AlignedAnchor3DRangeGenerator', 'Anchor3DRangeGenerator',\n    'build_prior_generator', 'AlignedAnchor3DRangeGeneratorPerCls'\n]\n"
  },
  {
    "path": "mmdet3d/core/anchor/anchor_3d_generator.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\n\nfrom mmdet.core.anchor import ANCHOR_GENERATORS\n\n\n@ANCHOR_GENERATORS.register_module()\nclass Anchor3DRangeGenerator(object):\n    \"\"\"3D Anchor Generator by range.\n\n    This anchor generator generates anchors by the given range in different\n    feature levels.\n    Due the convention in 3D detection, different anchor sizes are related to\n    different ranges for different categories. However we find this setting\n    does not effect the performance much in some datasets, e.g., nuScenes.\n\n    Args:\n        ranges (list[list[float]]): Ranges of different anchors.\n            The ranges are the same across different feature levels. But may\n            vary for different anchor sizes if size_per_range is True.\n        sizes (list[list[float]], optional): 3D sizes of anchors.\n            Defaults to [[3.9, 1.6, 1.56]].\n        scales (list[int], optional): Scales of anchors in different feature\n            levels. Defaults to [1].\n        rotations (list[float], optional): Rotations of anchors in a feature\n            grid. Defaults to [0, 1.5707963].\n        custom_values (tuple[float], optional): Customized values of that\n            anchor. For example, in nuScenes the anchors have velocities.\n            Defaults to ().\n        reshape_out (bool, optional): Whether to reshape the output into\n            (N x 4). Defaults to True.\n        size_per_range (bool, optional): Whether to use separate ranges for\n            different sizes. If size_per_range is True, the ranges should have\n            the same length as the sizes, if not, it will be duplicated.\n            Defaults to True.\n    \"\"\"\n\n    def __init__(self,\n                 ranges,\n                 sizes=[[3.9, 1.6, 1.56]],\n                 scales=[1],\n                 rotations=[0, 1.5707963],\n                 custom_values=(),\n                 reshape_out=True,\n                 size_per_range=True):\n        assert mmcv.is_list_of(ranges, list)\n        if size_per_range:\n            if len(sizes) != len(ranges):\n                assert len(ranges) == 1\n                ranges = ranges * len(sizes)\n            assert len(ranges) == len(sizes)\n        else:\n            assert len(ranges) == 1\n        assert mmcv.is_list_of(sizes, list)\n        assert isinstance(scales, list)\n\n        self.sizes = sizes\n        self.scales = scales\n        self.ranges = ranges\n        self.rotations = rotations\n        self.custom_values = custom_values\n        self.cached_anchors = None\n        self.reshape_out = reshape_out\n        self.size_per_range = size_per_range\n\n    def __repr__(self):\n        s = self.__class__.__name__ + '('\n        s += f'anchor_range={self.ranges},\\n'\n        s += f'scales={self.scales},\\n'\n        s += f'sizes={self.sizes},\\n'\n        s += f'rotations={self.rotations},\\n'\n        s += f'reshape_out={self.reshape_out},\\n'\n        s += f'size_per_range={self.size_per_range})'\n        return s\n\n    @property\n    def num_base_anchors(self):\n        \"\"\"list[int]: Total number of base anchors in a feature grid.\"\"\"\n        num_rot = len(self.rotations)\n        num_size = torch.tensor(self.sizes).reshape(-1, 3).size(0)\n        return num_rot * num_size\n\n    @property\n    def num_levels(self):\n        \"\"\"int: Number of feature levels that the generator is applied to.\"\"\"\n        return len(self.scales)\n\n    def grid_anchors(self, featmap_sizes, device='cuda'):\n        \"\"\"Generate grid anchors in multiple feature levels.\n\n        Args:\n            featmap_sizes (list[tuple]): List of feature map sizes in\n                multiple feature levels.\n            device (str, optional): Device where the anchors will be put on.\n                Defaults to 'cuda'.\n\n        Returns:\n            list[torch.Tensor]: Anchors in multiple feature levels.\n                The sizes of each tensor should be [N, 4], where\n                N = width * height * num_base_anchors, width and height\n                are the sizes of the corresponding feature level,\n                num_base_anchors is the number of anchors for that level.\n        \"\"\"\n        assert self.num_levels == len(featmap_sizes)\n        multi_level_anchors = []\n        for i in range(self.num_levels):\n            anchors = self.single_level_grid_anchors(\n                featmap_sizes[i], self.scales[i], device=device)\n            if self.reshape_out:\n                anchors = anchors.reshape(-1, anchors.size(-1))\n            multi_level_anchors.append(anchors)\n        return multi_level_anchors\n\n    def single_level_grid_anchors(self, featmap_size, scale, device='cuda'):\n        \"\"\"Generate grid anchors of a single level feature map.\n\n        This function is usually called by method ``self.grid_anchors``.\n\n        Args:\n            featmap_size (tuple[int]): Size of the feature map.\n            scale (float): Scale factor of the anchors in the current level.\n            device (str, optional): Device the tensor will be put on.\n                Defaults to 'cuda'.\n\n        Returns:\n            torch.Tensor: Anchors in the overall feature map.\n        \"\"\"\n        # We reimplement the anchor generator using torch in cuda\n        # torch: 0.6975 s for 1000 times\n        # numpy: 4.3345 s for 1000 times\n        # which is ~5 times faster than the numpy implementation\n        if not self.size_per_range:\n            return self.anchors_single_range(\n                featmap_size,\n                self.ranges[0],\n                scale,\n                self.sizes,\n                self.rotations,\n                device=device)\n\n        mr_anchors = []\n        for anchor_range, anchor_size in zip(self.ranges, self.sizes):\n            mr_anchors.append(\n                self.anchors_single_range(\n                    featmap_size,\n                    anchor_range,\n                    scale,\n                    anchor_size,\n                    self.rotations,\n                    device=device))\n        mr_anchors = torch.cat(mr_anchors, dim=-3)\n        return mr_anchors\n\n    def anchors_single_range(self,\n                             feature_size,\n                             anchor_range,\n                             scale=1,\n                             sizes=[[3.9, 1.6, 1.56]],\n                             rotations=[0, 1.5707963],\n                             device='cuda'):\n        \"\"\"Generate anchors in a single range.\n\n        Args:\n            feature_size (list[float] | tuple[float]): Feature map size. It is\n                either a list of a tuple of [D, H, W](in order of z, y, and x).\n            anchor_range (torch.Tensor | list[float]): Range of anchors with\n                shape [6]. The order is consistent with that of anchors, i.e.,\n                (x_min, y_min, z_min, x_max, y_max, z_max).\n            scale (float | int, optional): The scale factor of anchors.\n                Defaults to 1.\n            sizes (list[list] | np.ndarray | torch.Tensor, optional):\n                Anchor size with shape [N, 3], in order of x, y, z.\n                Defaults to [[3.9, 1.6, 1.56]].\n            rotations (list[float] | np.ndarray | torch.Tensor, optional):\n                Rotations of anchors in a single feature grid.\n                Defaults to [0, 1.5707963].\n            device (str): Devices that the anchors will be put on.\n                Defaults to 'cuda'.\n\n        Returns:\n            torch.Tensor: Anchors with shape\n                [*feature_size, num_sizes, num_rots, 7].\n        \"\"\"\n        if len(feature_size) == 2:\n            feature_size = [1, feature_size[0], feature_size[1]]\n        anchor_range = torch.tensor(anchor_range, device=device)\n        z_centers = torch.linspace(\n            anchor_range[2], anchor_range[5], feature_size[0], device=device)\n        y_centers = torch.linspace(\n            anchor_range[1], anchor_range[4], feature_size[1], device=device)\n        x_centers = torch.linspace(\n            anchor_range[0], anchor_range[3], feature_size[2], device=device)\n        sizes = torch.tensor(sizes, device=device).reshape(-1, 3) * scale\n        rotations = torch.tensor(rotations, device=device)\n\n        # torch.meshgrid default behavior is 'id', np's default is 'xy'\n        rets = torch.meshgrid(x_centers, y_centers, z_centers, rotations)\n        # torch.meshgrid returns a tuple rather than list\n        rets = list(rets)\n        tile_shape = [1] * 5\n        tile_shape[-2] = int(sizes.shape[0])\n        for i in range(len(rets)):\n            rets[i] = rets[i].unsqueeze(-2).repeat(tile_shape).unsqueeze(-1)\n\n        sizes = sizes.reshape([1, 1, 1, -1, 1, 3])\n        tile_size_shape = list(rets[0].shape)\n        tile_size_shape[3] = 1\n        sizes = sizes.repeat(tile_size_shape)\n        rets.insert(3, sizes)\n\n        ret = torch.cat(rets, dim=-1).permute([2, 1, 0, 3, 4, 5])\n        # [1, 200, 176, N, 2, 7] for kitti after permute\n\n        if len(self.custom_values) > 0:\n            custom_ndim = len(self.custom_values)\n            custom = ret.new_zeros([*ret.shape[:-1], custom_ndim])\n            # custom[:] = self.custom_values\n            ret = torch.cat([ret, custom], dim=-1)\n            # [1, 200, 176, N, 2, 9] for nus dataset after permute\n        return ret\n\n\n@ANCHOR_GENERATORS.register_module()\nclass AlignedAnchor3DRangeGenerator(Anchor3DRangeGenerator):\n    \"\"\"Aligned 3D Anchor Generator by range.\n\n    This anchor generator uses a different manner to generate the positions\n    of anchors' centers from :class:`Anchor3DRangeGenerator`.\n\n    Note:\n        The `align` means that the anchor's center is aligned with the voxel\n        grid, which is also the feature grid. The previous implementation of\n        :class:`Anchor3DRangeGenerator` does not generate the anchors' center\n        according to the voxel grid. Rather, it generates the center by\n        uniformly distributing the anchors inside the minimum and maximum\n        anchor ranges according to the feature map sizes.\n        However, this makes the anchors center does not match the feature grid.\n        The :class:`AlignedAnchor3DRangeGenerator` add + 1 when using the\n        feature map sizes to obtain the corners of the voxel grid. Then it\n        shifts the coordinates to the center of voxel grid and use the left\n        up corner to distribute anchors.\n\n    Args:\n        anchor_corner (bool, optional): Whether to align with the corner of the\n            voxel grid. By default it is False and the anchor's center will be\n            the same as the corresponding voxel's center, which is also the\n            center of the corresponding greature grid. Defaults to False.\n    \"\"\"\n\n    def __init__(self, align_corner=False, **kwargs):\n        super(AlignedAnchor3DRangeGenerator, self).__init__(**kwargs)\n        self.align_corner = align_corner\n\n    def anchors_single_range(self,\n                             feature_size,\n                             anchor_range,\n                             scale,\n                             sizes=[[3.9, 1.6, 1.56]],\n                             rotations=[0, 1.5707963],\n                             device='cuda'):\n        \"\"\"Generate anchors in a single range.\n\n        Args:\n            feature_size (list[float] | tuple[float]): Feature map size. It is\n                either a list of a tuple of [D, H, W](in order of z, y, and x).\n            anchor_range (torch.Tensor | list[float]): Range of anchors with\n                shape [6]. The order is consistent with that of anchors, i.e.,\n                (x_min, y_min, z_min, x_max, y_max, z_max).\n            scale (float | int): The scale factor of anchors.\n            sizes (list[list] | np.ndarray | torch.Tensor, optional):\n                Anchor size with shape [N, 3], in order of x, y, z.\n                Defaults to [[3.9, 1.6, 1.56]].\n            rotations (list[float] | np.ndarray | torch.Tensor, optional):\n                Rotations of anchors in a single feature grid.\n                Defaults to [0, 1.5707963].\n            device (str, optional): Devices that the anchors will be put on.\n                Defaults to 'cuda'.\n\n        Returns:\n            torch.Tensor: Anchors with shape\n                [*feature_size, num_sizes, num_rots, 7].\n        \"\"\"\n        if len(feature_size) == 2:\n            feature_size = [1, feature_size[0], feature_size[1]]\n        anchor_range = torch.tensor(anchor_range, device=device)\n        z_centers = torch.linspace(\n            anchor_range[2],\n            anchor_range[5],\n            feature_size[0] + 1,\n            device=device)\n        y_centers = torch.linspace(\n            anchor_range[1],\n            anchor_range[4],\n            feature_size[1] + 1,\n            device=device)\n        x_centers = torch.linspace(\n            anchor_range[0],\n            anchor_range[3],\n            feature_size[2] + 1,\n            device=device)\n        sizes = torch.tensor(sizes, device=device).reshape(-1, 3) * scale\n        rotations = torch.tensor(rotations, device=device)\n\n        # shift the anchor center\n        if not self.align_corner:\n            z_shift = (z_centers[1] - z_centers[0]) / 2\n            y_shift = (y_centers[1] - y_centers[0]) / 2\n            x_shift = (x_centers[1] - x_centers[0]) / 2\n            z_centers += z_shift\n            y_centers += y_shift\n            x_centers += x_shift\n\n        # torch.meshgrid default behavior is 'id', np's default is 'xy'\n        rets = torch.meshgrid(x_centers[:feature_size[2]],\n                              y_centers[:feature_size[1]],\n                              z_centers[:feature_size[0]], rotations)\n\n        # torch.meshgrid returns a tuple rather than list\n        rets = list(rets)\n        tile_shape = [1] * 5\n        tile_shape[-2] = int(sizes.shape[0])\n        for i in range(len(rets)):\n            rets[i] = rets[i].unsqueeze(-2).repeat(tile_shape).unsqueeze(-1)\n\n        sizes = sizes.reshape([1, 1, 1, -1, 1, 3])\n        tile_size_shape = list(rets[0].shape)\n        tile_size_shape[3] = 1\n        sizes = sizes.repeat(tile_size_shape)\n        rets.insert(3, sizes)\n\n        ret = torch.cat(rets, dim=-1).permute([2, 1, 0, 3, 4, 5])\n\n        if len(self.custom_values) > 0:\n            custom_ndim = len(self.custom_values)\n            custom = ret.new_zeros([*ret.shape[:-1], custom_ndim])\n            # TODO: check the support of custom values\n            # custom[:] = self.custom_values\n            ret = torch.cat([ret, custom], dim=-1)\n        return ret\n\n\n@ANCHOR_GENERATORS.register_module()\nclass AlignedAnchor3DRangeGeneratorPerCls(AlignedAnchor3DRangeGenerator):\n    \"\"\"3D Anchor Generator by range for per class.\n\n    This anchor generator generates anchors by the given range for per class.\n    Note that feature maps of different classes may be different.\n\n    Args:\n        kwargs (dict): Arguments are the same as those in\n            :class:`AlignedAnchor3DRangeGenerator`.\n    \"\"\"\n\n    def __init__(self, **kwargs):\n        super(AlignedAnchor3DRangeGeneratorPerCls, self).__init__(**kwargs)\n        assert len(self.scales) == 1, 'Multi-scale feature map levels are' + \\\n            ' not supported currently in this kind of anchor generator.'\n\n    def grid_anchors(self, featmap_sizes, device='cuda'):\n        \"\"\"Generate grid anchors in multiple feature levels.\n\n        Args:\n            featmap_sizes (list[tuple]): List of feature map sizes for\n                different classes in a single feature level.\n            device (str, optional): Device where the anchors will be put on.\n                Defaults to 'cuda'.\n\n        Returns:\n            list[list[torch.Tensor]]: Anchors in multiple feature levels.\n                Note that in this anchor generator, we currently only\n                support single feature level. The sizes of each tensor\n                should be [num_sizes/ranges*num_rots*featmap_size,\n                box_code_size].\n        \"\"\"\n        multi_level_anchors = []\n        anchors = self.multi_cls_grid_anchors(\n            featmap_sizes, self.scales[0], device=device)\n        multi_level_anchors.append(anchors)\n        return multi_level_anchors\n\n    def multi_cls_grid_anchors(self, featmap_sizes, scale, device='cuda'):\n        \"\"\"Generate grid anchors of a single level feature map for multi-class\n        with different feature map sizes.\n\n        This function is usually called by method ``self.grid_anchors``.\n\n        Args:\n            featmap_sizes (list[tuple]): List of feature map sizes for\n                different classes in a single feature level.\n            scale (float): Scale factor of the anchors in the current level.\n            device (str, optional): Device the tensor will be put on.\n                Defaults to 'cuda'.\n\n        Returns:\n            torch.Tensor: Anchors in the overall feature map.\n        \"\"\"\n        assert len(featmap_sizes) == len(self.sizes) == len(self.ranges), \\\n            'The number of different feature map sizes anchor sizes and ' + \\\n            'ranges should be the same.'\n\n        multi_cls_anchors = []\n        for i in range(len(featmap_sizes)):\n            anchors = self.anchors_single_range(\n                featmap_sizes[i],\n                self.ranges[i],\n                scale,\n                self.sizes[i],\n                self.rotations,\n                device=device)\n            # [*featmap_size, num_sizes/ranges, num_rots, box_code_size]\n            ndim = len(featmap_sizes[i])\n            anchors = anchors.view(*featmap_sizes[i], -1, anchors.size(-1))\n            # [*featmap_size, num_sizes/ranges*num_rots, box_code_size]\n            anchors = anchors.permute(ndim, *range(0, ndim), ndim + 1)\n            # [num_sizes/ranges*num_rots, *featmap_size, box_code_size]\n            multi_cls_anchors.append(anchors.reshape(-1, anchors.size(-1)))\n            # [num_sizes/ranges*num_rots*featmap_size, box_code_size]\n        return multi_cls_anchors\n"
  },
  {
    "path": "mmdet3d/core/bbox/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .assigners import AssignResult, BaseAssigner, MaxIoUAssigner\nfrom .coders import DeltaXYZWLHRBBoxCoder\n# from .bbox_target import bbox_target\nfrom .iou_calculators import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D,\n                              BboxOverlapsNearest3D,\n                              axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d,\n                              bbox_overlaps_nearest_3d)\nfrom .samplers import (BaseSampler, CombinedSampler,\n                       InstanceBalancedPosSampler, IoUBalancedNegSampler,\n                       PseudoSampler, RandomSampler, SamplingResult)\nfrom .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes,\n                         Coord3DMode, DepthInstance3DBoxes,\n                         LiDARInstance3DBoxes, get_box_type, limit_period,\n                         mono_cam_box2vis, points_cam2img, points_img2cam,\n                         xywhr2xyxyr, CustomBox)\nfrom .transforms import bbox3d2result, bbox3d2roi, bbox3d_mapping_back\nfrom .util import *\n__all__ = [\n    'BaseSampler', 'AssignResult', 'BaseAssigner', 'MaxIoUAssigner',\n    'PseudoSampler', 'RandomSampler', 'InstanceBalancedPosSampler',\n    'IoUBalancedNegSampler', 'CombinedSampler', 'SamplingResult',\n    'DeltaXYZWLHRBBoxCoder', 'BboxOverlapsNearest3D', 'BboxOverlaps3D',\n    'bbox_overlaps_nearest_3d', 'bbox_overlaps_3d',\n    'AxisAlignedBboxOverlaps3D', 'axis_aligned_bbox_overlaps_3d', 'Box3DMode',\n    'LiDARInstance3DBoxes', 'CameraInstance3DBoxes', 'bbox3d2roi',\n    'bbox3d2result', 'DepthInstance3DBoxes', 'BaseInstance3DBoxes',\n    'bbox3d_mapping_back', 'xywhr2xyxyr', 'limit_period', 'points_cam2img',\n    'points_img2cam', 'get_box_type', 'Coord3DMode', 'mono_cam_box2vis'\n]\n"
  },
  {
    "path": "mmdet3d/core/bbox/assigners/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmdet.core.bbox import AssignResult, BaseAssigner, MaxIoUAssigner\n\n__all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult']\n"
  },
  {
    "path": "mmdet3d/core/bbox/box_np_ops.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n# TODO: clean the functions in this file and move the APIs into box structures\n# in the future\n# NOTICE: All functions in this file are valid for LiDAR or depth boxes only\n# if we use default parameters.\n\nimport numba\nimport numpy as np\n\nfrom .structures.utils import limit_period, points_cam2img, rotation_3d_in_axis\n\n\ndef camera_to_lidar(points, r_rect, velo2cam):\n    \"\"\"Convert points in camera coordinate to lidar coordinate.\n\n    Note:\n        This function is for KITTI only.\n\n    Args:\n        points (np.ndarray, shape=[N, 3]): Points in camera coordinate.\n        r_rect (np.ndarray, shape=[4, 4]): Matrix to project points in\n            specific camera coordinate (e.g. CAM2) to CAM0.\n        velo2cam (np.ndarray, shape=[4, 4]): Matrix to project points in\n            camera coordinate to lidar coordinate.\n\n    Returns:\n        np.ndarray, shape=[N, 3]: Points in lidar coordinate.\n    \"\"\"\n    points_shape = list(points.shape[0:-1])\n    if points.shape[-1] == 3:\n        points = np.concatenate([points, np.ones(points_shape + [1])], axis=-1)\n    lidar_points = points @ np.linalg.inv((r_rect @ velo2cam).T)\n    return lidar_points[..., :3]\n\n\ndef box_camera_to_lidar(data, r_rect, velo2cam):\n    \"\"\"Convert boxes in camera coordinate to lidar coordinate.\n\n    Note:\n        This function is for KITTI only.\n\n    Args:\n        data (np.ndarray, shape=[N, 7]): Boxes in camera coordinate.\n        r_rect (np.ndarray, shape=[4, 4]): Matrix to project points in\n            specific camera coordinate (e.g. CAM2) to CAM0.\n        velo2cam (np.ndarray, shape=[4, 4]): Matrix to project points in\n            camera coordinate to lidar coordinate.\n\n    Returns:\n        np.ndarray, shape=[N, 3]: Boxes in lidar coordinate.\n    \"\"\"\n    xyz = data[:, 0:3]\n    x_size, y_size, z_size = data[:, 3:4], data[:, 4:5], data[:, 5:6]\n    r = data[:, 6:7]\n    xyz_lidar = camera_to_lidar(xyz, r_rect, velo2cam)\n    # yaw and dims also needs to be converted\n    r_new = -r - np.pi / 2\n    r_new = limit_period(r_new, period=np.pi * 2)\n    return np.concatenate([xyz_lidar, x_size, z_size, y_size, r_new], axis=1)\n\n\ndef corners_nd(dims, origin=0.5):\n    \"\"\"Generate relative box corners based on length per dim and origin point.\n\n    Args:\n        dims (np.ndarray, shape=[N, ndim]): Array of length per dim\n        origin (list or array or float, optional): origin point relate to\n            smallest point. Defaults to 0.5\n\n    Returns:\n        np.ndarray, shape=[N, 2 ** ndim, ndim]: Returned corners.\n        point layout example: (2d) x0y0, x0y1, x1y0, x1y1;\n            (3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1\n            where x0 < x1, y0 < y1, z0 < z1.\n    \"\"\"\n    ndim = int(dims.shape[1])\n    corners_norm = np.stack(\n        np.unravel_index(np.arange(2**ndim), [2] * ndim),\n        axis=1).astype(dims.dtype)\n    # now corners_norm has format: (2d) x0y0, x0y1, x1y0, x1y1\n    # (3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1\n    # so need to convert to a format which is convenient to do other computing.\n    # for 2d boxes, format is clockwise start with minimum point\n    # for 3d boxes, please draw lines by your hand.\n    if ndim == 2:\n        # generate clockwise box corners\n        corners_norm = corners_norm[[0, 1, 3, 2]]\n    elif ndim == 3:\n        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]\n    corners_norm = corners_norm - np.array(origin, dtype=dims.dtype)\n    corners = dims.reshape([-1, 1, ndim]) * corners_norm.reshape(\n        [1, 2**ndim, ndim])\n    return corners\n\n\ndef center_to_corner_box2d(centers, dims, angles=None, origin=0.5):\n    \"\"\"Convert kitti locations, dimensions and angles to corners.\n    format: center(xy), dims(xy), angles(counterclockwise when positive)\n\n    Args:\n        centers (np.ndarray): Locations in kitti label file with shape (N, 2).\n        dims (np.ndarray): Dimensions in kitti label file with shape (N, 2).\n        angles (np.ndarray, optional): Rotation_y in kitti label file with\n            shape (N). Defaults to None.\n        origin (list or array or float, optional): origin point relate to\n            smallest point. Defaults to 0.5.\n\n    Returns:\n        np.ndarray: Corners with the shape of (N, 4, 2).\n    \"\"\"\n    # 'length' in kitti format is in x axis.\n    # xyz(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(wlh)(lidar)\n    # center in kitti format is [0.5, 1.0, 0.5] in xyz.\n    corners = corners_nd(dims, origin=origin)\n    # corners: [N, 4, 2]\n    if angles is not None:\n        corners = rotation_3d_in_axis(corners, angles)\n    corners += centers.reshape([-1, 1, 2])\n    return corners\n\n\n@numba.jit(nopython=True)\ndef depth_to_points(depth, trunc_pixel):\n    \"\"\"Convert depth map to points.\n\n    Args:\n        depth (np.array, shape=[H, W]): Depth map which\n            the row of [0~`trunc_pixel`] are truncated.\n        trunc_pixel (int): The number of truncated row.\n\n    Returns:\n        np.ndarray: Points in camera coordinates.\n    \"\"\"\n    num_pts = np.sum(depth[trunc_pixel:, ] > 0.1)\n    points = np.zeros((num_pts, 3), dtype=depth.dtype)\n    x = np.array([0, 0, 1], dtype=depth.dtype)\n    k = 0\n    for i in range(trunc_pixel, depth.shape[0]):\n        for j in range(depth.shape[1]):\n            if depth[i, j] > 0.1:\n                x = np.array([j, i, 1], dtype=depth.dtype)\n                points[k] = x * depth[i, j]\n                k += 1\n    return points\n\n\ndef depth_to_lidar_points(depth, trunc_pixel, P2, r_rect, velo2cam):\n    \"\"\"Convert depth map to points in lidar coordinate.\n\n    Args:\n        depth (np.array, shape=[H, W]): Depth map which\n            the row of [0~`trunc_pixel`] are truncated.\n        trunc_pixel (int): The number of truncated row.\n        P2 (p.array, shape=[4, 4]): Intrinsics of Camera2.\n        r_rect (np.ndarray, shape=[4, 4]): Matrix to project points in\n            specific camera coordinate (e.g. CAM2) to CAM0.\n        velo2cam (np.ndarray, shape=[4, 4]): Matrix to project points in\n            camera coordinate to lidar coordinate.\n\n    Returns:\n        np.ndarray: Points in lidar coordinates.\n    \"\"\"\n    pts = depth_to_points(depth, trunc_pixel)\n    points_shape = list(pts.shape[0:-1])\n    points = np.concatenate([pts, np.ones(points_shape + [1])], axis=-1)\n    points = points @ np.linalg.inv(P2.T)\n    lidar_points = camera_to_lidar(points, r_rect, velo2cam)\n    return lidar_points\n\n\ndef center_to_corner_box3d(centers,\n                           dims,\n                           angles=None,\n                           origin=(0.5, 1.0, 0.5),\n                           axis=1):\n    \"\"\"Convert kitti locations, dimensions and angles to corners.\n\n    Args:\n        centers (np.ndarray): Locations in kitti label file with shape (N, 3).\n        dims (np.ndarray): Dimensions in kitti label file with shape (N, 3).\n        angles (np.ndarray, optional): Rotation_y in kitti label file with\n            shape (N). Defaults to None.\n        origin (list or array or float, optional): Origin point relate to\n            smallest point. Use (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0)\n            in lidar. Defaults to (0.5, 1.0, 0.5).\n        axis (int, optional): Rotation axis. 1 for camera and 2 for lidar.\n            Defaults to 1.\n\n    Returns:\n        np.ndarray: Corners with the shape of (N, 8, 3).\n    \"\"\"\n    # 'length' in kitti format is in x axis.\n    # yzx(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(lwh)(lidar)\n    # center in kitti format is [0.5, 1.0, 0.5] in xyz.\n    corners = corners_nd(dims, origin=origin)\n    # corners: [N, 8, 3]\n    if angles is not None:\n        corners = rotation_3d_in_axis(corners, angles, axis=axis)\n    corners += centers.reshape([-1, 1, 3])\n    return corners\n\n\n@numba.jit(nopython=True)\ndef box2d_to_corner_jit(boxes):\n    \"\"\"Convert box2d to corner.\n\n    Args:\n        boxes (np.ndarray, shape=[N, 5]): Boxes2d with rotation.\n\n    Returns:\n        box_corners (np.ndarray, shape=[N, 4, 2]): Box corners.\n    \"\"\"\n    num_box = boxes.shape[0]\n    corners_norm = np.zeros((4, 2), dtype=boxes.dtype)\n    corners_norm[1, 1] = 1.0\n    corners_norm[2] = 1.0\n    corners_norm[3, 0] = 1.0\n    corners_norm -= np.array([0.5, 0.5], dtype=boxes.dtype)\n    corners = boxes.reshape(num_box, 1, 5)[:, :, 2:4] * corners_norm.reshape(\n        1, 4, 2)\n    rot_mat_T = np.zeros((2, 2), dtype=boxes.dtype)\n    box_corners = np.zeros((num_box, 4, 2), dtype=boxes.dtype)\n    for i in range(num_box):\n        rot_sin = np.sin(boxes[i, -1])\n        rot_cos = np.cos(boxes[i, -1])\n        rot_mat_T[0, 0] = rot_cos\n        rot_mat_T[0, 1] = rot_sin\n        rot_mat_T[1, 0] = -rot_sin\n        rot_mat_T[1, 1] = rot_cos\n        box_corners[i] = corners[i] @ rot_mat_T + boxes[i, :2]\n    return box_corners\n\n\n@numba.njit\ndef corner_to_standup_nd_jit(boxes_corner):\n    \"\"\"Convert boxes_corner to aligned (min-max) boxes.\n\n    Args:\n        boxes_corner (np.ndarray, shape=[N, 2**dim, dim]): Boxes corners.\n\n    Returns:\n        np.ndarray, shape=[N, dim*2]: Aligned (min-max) boxes.\n    \"\"\"\n    num_boxes = boxes_corner.shape[0]\n    ndim = boxes_corner.shape[-1]\n    result = np.zeros((num_boxes, ndim * 2), dtype=boxes_corner.dtype)\n    for i in range(num_boxes):\n        for j in range(ndim):\n            result[i, j] = np.min(boxes_corner[i, :, j])\n        for j in range(ndim):\n            result[i, j + ndim] = np.max(boxes_corner[i, :, j])\n    return result\n\n\n@numba.jit(nopython=True)\ndef corner_to_surfaces_3d_jit(corners):\n    \"\"\"Convert 3d box corners from corner function above to surfaces that\n    normal vectors all direct to internal.\n\n    Args:\n        corners (np.ndarray): 3d box corners with the shape of (N, 8, 3).\n\n    Returns:\n        np.ndarray: Surfaces with the shape of (N, 6, 4, 3).\n    \"\"\"\n    # box_corners: [N, 8, 3], must from corner functions in this module\n    num_boxes = corners.shape[0]\n    surfaces = np.zeros((num_boxes, 6, 4, 3), dtype=corners.dtype)\n    corner_idxes = np.array([\n        0, 1, 2, 3, 7, 6, 5, 4, 0, 3, 7, 4, 1, 5, 6, 2, 0, 4, 5, 1, 3, 2, 6, 7\n    ]).reshape(6, 4)\n    for i in range(num_boxes):\n        for j in range(6):\n            for k in range(4):\n                surfaces[i, j, k] = corners[i, corner_idxes[j, k]]\n    return surfaces\n\n\ndef rotation_points_single_angle(points, angle, axis=0):\n    \"\"\"Rotate points with a single angle.\n\n    Args:\n        points (np.ndarray, shape=[N, 3]]):\n        angle (np.ndarray, shape=[1]]):\n        axis (int, optional): Axis to rotate at. Defaults to 0.\n\n    Returns:\n        np.ndarray: Rotated points.\n    \"\"\"\n    # points: [N, 3]\n    rot_sin = np.sin(angle)\n    rot_cos = np.cos(angle)\n    if axis == 1:\n        rot_mat_T = np.array(\n            [[rot_cos, 0, rot_sin], [0, 1, 0], [-rot_sin, 0, rot_cos]],\n            dtype=points.dtype)\n    elif axis == 2 or axis == -1:\n        rot_mat_T = np.array(\n            [[rot_cos, rot_sin, 0], [-rot_sin, rot_cos, 0], [0, 0, 1]],\n            dtype=points.dtype)\n    elif axis == 0:\n        rot_mat_T = np.array(\n            [[1, 0, 0], [0, rot_cos, rot_sin], [0, -rot_sin, rot_cos]],\n            dtype=points.dtype)\n    else:\n        raise ValueError('axis should in range')\n\n    return points @ rot_mat_T, rot_mat_T\n\n\ndef box3d_to_bbox(box3d, P2):\n    \"\"\"Convert box3d in camera coordinates to bbox in image coordinates.\n\n    Args:\n        box3d (np.ndarray, shape=[N, 7]): Boxes in camera coordinate.\n        P2 (np.array, shape=[4, 4]): Intrinsics of Camera2.\n\n    Returns:\n        np.ndarray, shape=[N, 4]: Boxes 2d in image coordinates.\n    \"\"\"\n    box_corners = center_to_corner_box3d(\n        box3d[:, :3], box3d[:, 3:6], box3d[:, 6], [0.5, 1.0, 0.5], axis=1)\n    box_corners_in_image = points_cam2img(box_corners, P2)\n    # box_corners_in_image: [N, 8, 2]\n    minxy = np.min(box_corners_in_image, axis=1)\n    maxxy = np.max(box_corners_in_image, axis=1)\n    bbox = np.concatenate([minxy, maxxy], axis=1)\n    return bbox\n\n\ndef corner_to_surfaces_3d(corners):\n    \"\"\"convert 3d box corners from corner function above to surfaces that\n    normal vectors all direct to internal.\n\n    Args:\n        corners (np.ndarray): 3D box corners with shape of (N, 8, 3).\n\n    Returns:\n        np.ndarray: Surfaces with the shape of (N, 6, 4, 3).\n    \"\"\"\n    # box_corners: [N, 8, 3], must from corner functions in this module\n    surfaces = np.array([\n        [corners[:, 0], corners[:, 1], corners[:, 2], corners[:, 3]],\n        [corners[:, 7], corners[:, 6], corners[:, 5], corners[:, 4]],\n        [corners[:, 0], corners[:, 3], corners[:, 7], corners[:, 4]],\n        [corners[:, 1], corners[:, 5], corners[:, 6], corners[:, 2]],\n        [corners[:, 0], corners[:, 4], corners[:, 5], corners[:, 1]],\n        [corners[:, 3], corners[:, 2], corners[:, 6], corners[:, 7]],\n    ]).transpose([2, 0, 1, 3])\n    return surfaces\n\n\ndef points_in_rbbox(points, rbbox, z_axis=2, origin=(0.5, 0.5, 0)):\n    \"\"\"Check points in rotated bbox and return indices.\n\n    Note:\n        This function is for counterclockwise boxes.\n\n    Args:\n        points (np.ndarray, shape=[N, 3+dim]): Points to query.\n        rbbox (np.ndarray, shape=[M, 7]): Boxes3d with rotation.\n        z_axis (int, optional): Indicate which axis is height.\n            Defaults to 2.\n        origin (tuple[int], optional): Indicate the position of\n            box center. Defaults to (0.5, 0.5, 0).\n\n    Returns:\n        np.ndarray, shape=[N, M]: Indices of points in each box.\n    \"\"\"\n    # TODO: this function is different from PointCloud3D, be careful\n    # when start to use nuscene, check the input\n    rbbox_corners = center_to_corner_box3d(\n        rbbox[:, :3], rbbox[:, 3:6], rbbox[:, 6], origin=origin, axis=z_axis)\n    surfaces = corner_to_surfaces_3d(rbbox_corners)\n    indices = points_in_convex_polygon_3d_jit(points[:, :3], surfaces)\n    return indices\n\n\ndef minmax_to_corner_2d(minmax_box):\n    \"\"\"Convert minmax box to corners2d.\n\n    Args:\n        minmax_box (np.ndarray, shape=[N, dims]): minmax boxes.\n\n    Returns:\n        np.ndarray: 2d corners of boxes\n    \"\"\"\n    ndim = minmax_box.shape[-1] // 2\n    center = minmax_box[..., :ndim]\n    dims = minmax_box[..., ndim:] - center\n    return center_to_corner_box2d(center, dims, origin=0.0)\n\n\ndef create_anchors_3d_range(feature_size,\n                            anchor_range,\n                            sizes=((3.9, 1.6, 1.56), ),\n                            rotations=(0, np.pi / 2),\n                            dtype=np.float32):\n    \"\"\"Create anchors 3d by range.\n\n    Args:\n        feature_size (list[float] | tuple[float]): Feature map size. It is\n            either a list of a tuple of [D, H, W](in order of z, y, and x).\n        anchor_range (torch.Tensor | list[float]): Range of anchors with\n            shape [6]. The order is consistent with that of anchors, i.e.,\n            (x_min, y_min, z_min, x_max, y_max, z_max).\n        sizes (list[list] | np.ndarray | torch.Tensor, optional):\n            Anchor size with shape [N, 3], in order of x, y, z.\n            Defaults to ((3.9, 1.6, 1.56), ).\n        rotations (list[float] | np.ndarray | torch.Tensor, optional):\n            Rotations of anchors in a single feature grid.\n            Defaults to (0, np.pi / 2).\n        dtype (type, optional): Data type. Defaults to np.float32.\n\n    Returns:\n        np.ndarray: Range based anchors with shape of\n            (*feature_size, num_sizes, num_rots, 7).\n    \"\"\"\n    anchor_range = np.array(anchor_range, dtype)\n    z_centers = np.linspace(\n        anchor_range[2], anchor_range[5], feature_size[0], dtype=dtype)\n    y_centers = np.linspace(\n        anchor_range[1], anchor_range[4], feature_size[1], dtype=dtype)\n    x_centers = np.linspace(\n        anchor_range[0], anchor_range[3], feature_size[2], dtype=dtype)\n    sizes = np.reshape(np.array(sizes, dtype=dtype), [-1, 3])\n    rotations = np.array(rotations, dtype=dtype)\n    rets = np.meshgrid(\n        x_centers, y_centers, z_centers, rotations, indexing='ij')\n    tile_shape = [1] * 5\n    tile_shape[-2] = int(sizes.shape[0])\n    for i in range(len(rets)):\n        rets[i] = np.tile(rets[i][..., np.newaxis, :], tile_shape)\n        rets[i] = rets[i][..., np.newaxis]  # for concat\n    sizes = np.reshape(sizes, [1, 1, 1, -1, 1, 3])\n    tile_size_shape = list(rets[0].shape)\n    tile_size_shape[3] = 1\n    sizes = np.tile(sizes, tile_size_shape)\n    rets.insert(3, sizes)\n    ret = np.concatenate(rets, axis=-1)\n    return np.transpose(ret, [2, 1, 0, 3, 4, 5])\n\n\ndef center_to_minmax_2d(centers, dims, origin=0.5):\n    \"\"\"Center to minmax.\n\n    Args:\n        centers (np.ndarray): Center points.\n        dims (np.ndarray): Dimensions.\n        origin (list or array or float, optional): Origin point relate\n            to smallest point. Defaults to 0.5.\n\n    Returns:\n        np.ndarray: Minmax points.\n    \"\"\"\n    if origin == 0.5:\n        return np.concatenate([centers - dims / 2, centers + dims / 2],\n                              axis=-1)\n    corners = center_to_corner_box2d(centers, dims, origin=origin)\n    return corners[:, [0, 2]].reshape([-1, 4])\n\n\ndef rbbox2d_to_near_bbox(rbboxes):\n    \"\"\"convert rotated bbox to nearest 'standing' or 'lying' bbox.\n\n    Args:\n        rbboxes (np.ndarray): Rotated bboxes with shape of\n            (N, 5(x, y, xdim, ydim, rad)).\n\n    Returns:\n        np.ndarray: Bounding boxes with the shape of\n            (N, 4(xmin, ymin, xmax, ymax)).\n    \"\"\"\n    rots = rbboxes[..., -1]\n    rots_0_pi_div_2 = np.abs(limit_period(rots, 0.5, np.pi))\n    cond = (rots_0_pi_div_2 > np.pi / 4)[..., np.newaxis]\n    bboxes_center = np.where(cond, rbboxes[:, [0, 1, 3, 2]], rbboxes[:, :4])\n    bboxes = center_to_minmax_2d(bboxes_center[:, :2], bboxes_center[:, 2:])\n    return bboxes\n\n\n@numba.jit(nopython=True)\ndef iou_jit(boxes, query_boxes, mode='iou', eps=0.0):\n    \"\"\"Calculate box iou. Note that jit version runs ~10x faster than the\n    box_overlaps function in mmdet3d.core.evaluation.\n\n    Note:\n        This function is for counterclockwise boxes.\n\n    Args:\n        boxes (np.ndarray): Input bounding boxes with shape of (N, 4).\n        query_boxes (np.ndarray): Query boxes with shape of (K, 4).\n        mode (str, optional): IoU mode. Defaults to 'iou'.\n        eps (float, optional): Value added to denominator. Defaults to 0.\n\n    Returns:\n        np.ndarray: Overlap between boxes and query_boxes\n            with the shape of [N, K].\n    \"\"\"\n    N = boxes.shape[0]\n    K = query_boxes.shape[0]\n    overlaps = np.zeros((N, K), dtype=boxes.dtype)\n    for k in range(K):\n        box_area = ((query_boxes[k, 2] - query_boxes[k, 0] + eps) *\n                    (query_boxes[k, 3] - query_boxes[k, 1] + eps))\n        for n in range(N):\n            iw = (\n                min(boxes[n, 2], query_boxes[k, 2]) -\n                max(boxes[n, 0], query_boxes[k, 0]) + eps)\n            if iw > 0:\n                ih = (\n                    min(boxes[n, 3], query_boxes[k, 3]) -\n                    max(boxes[n, 1], query_boxes[k, 1]) + eps)\n                if ih > 0:\n                    if mode == 'iou':\n                        ua = ((boxes[n, 2] - boxes[n, 0] + eps) *\n                              (boxes[n, 3] - boxes[n, 1] + eps) + box_area -\n                              iw * ih)\n                    else:\n                        ua = ((boxes[n, 2] - boxes[n, 0] + eps) *\n                              (boxes[n, 3] - boxes[n, 1] + eps))\n                    overlaps[n, k] = iw * ih / ua\n    return overlaps\n\n\ndef projection_matrix_to_CRT_kitti(proj):\n    \"\"\"Split projection matrix of KITTI.\n\n    Note:\n        This function is for KITTI only.\n\n    P = C @ [R|T]\n    C is upper triangular matrix, so we need to inverse CR and use QR\n    stable for all kitti camera projection matrix.\n\n    Args:\n        proj (p.array, shape=[4, 4]): Intrinsics of camera.\n\n    Returns:\n        tuple[np.ndarray]: Splited matrix of C, R and T.\n    \"\"\"\n\n    CR = proj[0:3, 0:3]\n    CT = proj[0:3, 3]\n    RinvCinv = np.linalg.inv(CR)\n    Rinv, Cinv = np.linalg.qr(RinvCinv)\n    C = np.linalg.inv(Cinv)\n    R = np.linalg.inv(Rinv)\n    T = Cinv @ CT\n    return C, R, T\n\n\ndef remove_outside_points(points, rect, Trv2c, P2, image_shape):\n    \"\"\"Remove points which are outside of image.\n\n    Note:\n        This function is for KITTI only.\n\n    Args:\n        points (np.ndarray, shape=[N, 3+dims]): Total points.\n        rect (np.ndarray, shape=[4, 4]): Matrix to project points in\n            specific camera coordinate (e.g. CAM2) to CAM0.\n        Trv2c (np.ndarray, shape=[4, 4]): Matrix to project points in\n            camera coordinate to lidar coordinate.\n        P2 (p.array, shape=[4, 4]): Intrinsics of Camera2.\n        image_shape (list[int]): Shape of image.\n\n    Returns:\n        np.ndarray, shape=[N, 3+dims]: Filtered points.\n    \"\"\"\n    # 5x faster than remove_outside_points_v1(2ms vs 10ms)\n    C, R, T = projection_matrix_to_CRT_kitti(P2)\n    image_bbox = [0, 0, image_shape[1], image_shape[0]]\n    frustum = get_frustum(image_bbox, C)\n    frustum -= T\n    frustum = np.linalg.inv(R) @ frustum.T\n    frustum = camera_to_lidar(frustum.T, rect, Trv2c)\n    frustum_surfaces = corner_to_surfaces_3d_jit(frustum[np.newaxis, ...])\n    indices = points_in_convex_polygon_3d_jit(points[:, :3], frustum_surfaces)\n    points = points[indices.reshape([-1])]\n    return points\n\n\ndef get_frustum(bbox_image, C, near_clip=0.001, far_clip=100):\n    \"\"\"Get frustum corners in camera coordinates.\n\n    Args:\n        bbox_image (list[int]): box in image coordinates.\n        C (np.ndarray): Intrinsics.\n        near_clip (float, optional): Nearest distance of frustum.\n            Defaults to 0.001.\n        far_clip (float, optional): Farthest distance of frustum.\n            Defaults to 100.\n\n    Returns:\n        np.ndarray, shape=[8, 3]: coordinates of frustum corners.\n    \"\"\"\n    fku = C[0, 0]\n    fkv = -C[1, 1]\n    u0v0 = C[0:2, 2]\n    z_points = np.array(\n        [near_clip] * 4 + [far_clip] * 4, dtype=C.dtype)[:, np.newaxis]\n    b = bbox_image\n    box_corners = np.array(\n        [[b[0], b[1]], [b[0], b[3]], [b[2], b[3]], [b[2], b[1]]],\n        dtype=C.dtype)\n    near_box_corners = (box_corners - u0v0) / np.array(\n        [fku / near_clip, -fkv / near_clip], dtype=C.dtype)\n    far_box_corners = (box_corners - u0v0) / np.array(\n        [fku / far_clip, -fkv / far_clip], dtype=C.dtype)\n    ret_xy = np.concatenate([near_box_corners, far_box_corners],\n                            axis=0)  # [8, 2]\n    ret_xyz = np.concatenate([ret_xy, z_points], axis=1)\n    return ret_xyz\n\n\ndef surface_equ_3d(polygon_surfaces):\n    \"\"\"\n\n    Args:\n        polygon_surfaces (np.ndarray): Polygon surfaces with shape of\n            [num_polygon, max_num_surfaces, max_num_points_of_surface, 3].\n            All surfaces' normal vector must direct to internal.\n            Max_num_points_of_surface must at least 3.\n\n    Returns:\n        tuple: normal vector and its direction.\n    \"\"\"\n    # return [a, b, c], d in ax+by+cz+d=0\n    # polygon_surfaces: [num_polygon, num_surfaces, num_points_of_polygon, 3]\n    surface_vec = polygon_surfaces[:, :, :2, :] - \\\n        polygon_surfaces[:, :, 1:3, :]\n    # normal_vec: [..., 3]\n    normal_vec = np.cross(surface_vec[:, :, 0, :], surface_vec[:, :, 1, :])\n    # print(normal_vec.shape, points[..., 0, :].shape)\n    # d = -np.inner(normal_vec, points[..., 0, :])\n    d = np.einsum('aij, aij->ai', normal_vec, polygon_surfaces[:, :, 0, :])\n    return normal_vec, -d\n\n\n@numba.njit\ndef _points_in_convex_polygon_3d_jit(points, polygon_surfaces, normal_vec, d,\n                                     num_surfaces):\n    \"\"\"\n    Args:\n        points (np.ndarray): Input points with shape of (num_points, 3).\n        polygon_surfaces (np.ndarray): Polygon surfaces with shape of\n            (num_polygon, max_num_surfaces, max_num_points_of_surface, 3).\n            All surfaces' normal vector must direct to internal.\n            Max_num_points_of_surface must at least 3.\n        normal_vec (np.ndarray): Normal vector of polygon_surfaces.\n        d (int): Directions of normal vector.\n        num_surfaces (np.ndarray): Number of surfaces a polygon contains\n            shape of (num_polygon).\n\n    Returns:\n        np.ndarray: Result matrix with the shape of [num_points, num_polygon].\n    \"\"\"\n    max_num_surfaces, max_num_points_of_surface = polygon_surfaces.shape[1:3]\n    num_points = points.shape[0]\n    num_polygons = polygon_surfaces.shape[0]\n    ret = np.ones((num_points, num_polygons), dtype=np.bool_)\n    sign = 0.0\n    for i in range(num_points):\n        for j in range(num_polygons):\n            for k in range(max_num_surfaces):\n                if k > num_surfaces[j]:\n                    break\n                sign = (\n                    points[i, 0] * normal_vec[j, k, 0] +\n                    points[i, 1] * normal_vec[j, k, 1] +\n                    points[i, 2] * normal_vec[j, k, 2] + d[j, k])\n                if sign >= 0:\n                    ret[i, j] = False\n                    break\n    return ret\n\n\ndef points_in_convex_polygon_3d_jit(points,\n                                    polygon_surfaces,\n                                    num_surfaces=None):\n    \"\"\"Check points is in 3d convex polygons.\n\n    Args:\n        points (np.ndarray): Input points with shape of (num_points, 3).\n        polygon_surfaces (np.ndarray): Polygon surfaces with shape of\n            (num_polygon, max_num_surfaces, max_num_points_of_surface, 3).\n            All surfaces' normal vector must direct to internal.\n            Max_num_points_of_surface must at least 3.\n        num_surfaces (np.ndarray, optional): Number of surfaces a polygon\n            contains shape of (num_polygon). Defaults to None.\n\n    Returns:\n        np.ndarray: Result matrix with the shape of [num_points, num_polygon].\n    \"\"\"\n    max_num_surfaces, max_num_points_of_surface = polygon_surfaces.shape[1:3]\n    # num_points = points.shape[0]\n    num_polygons = polygon_surfaces.shape[0]\n    if num_surfaces is None:\n        num_surfaces = np.full((num_polygons, ), 9999999, dtype=np.int64)\n    normal_vec, d = surface_equ_3d(polygon_surfaces[:, :, :3, :])\n    # normal_vec: [num_polygon, max_num_surfaces, 3]\n    # d: [num_polygon, max_num_surfaces]\n    return _points_in_convex_polygon_3d_jit(points, polygon_surfaces,\n                                            normal_vec, d, num_surfaces)\n\n\n@numba.njit\ndef points_in_convex_polygon_jit(points, polygon, clockwise=False):\n    \"\"\"Check points is in 2d convex polygons. True when point in polygon.\n\n    Args:\n        points (np.ndarray): Input points with the shape of [num_points, 2].\n        polygon (np.ndarray): Input polygon with the shape of\n            [num_polygon, num_points_of_polygon, 2].\n        clockwise (bool, optional): Indicate polygon is clockwise. Defaults\n            to True.\n\n    Returns:\n        np.ndarray: Result matrix with the shape of [num_points, num_polygon].\n    \"\"\"\n    # first convert polygon to directed lines\n    num_points_of_polygon = polygon.shape[1]\n    num_points = points.shape[0]\n    num_polygons = polygon.shape[0]\n    # vec for all the polygons\n    if clockwise:\n        vec1 = polygon - polygon[:,\n                                 np.array([num_points_of_polygon - 1] + list(\n                                     range(num_points_of_polygon - 1))), :]\n    else:\n        vec1 = polygon[:,\n                       np.array([num_points_of_polygon - 1] +\n                                list(range(num_points_of_polygon -\n                                           1))), :] - polygon\n    ret = np.zeros((num_points, num_polygons), dtype=np.bool_)\n    success = True\n    cross = 0.0\n    for i in range(num_points):\n        for j in range(num_polygons):\n            success = True\n            for k in range(num_points_of_polygon):\n                vec = vec1[j, k]\n                cross = vec[1] * (polygon[j, k, 0] - points[i, 0])\n                cross -= vec[0] * (polygon[j, k, 1] - points[i, 1])\n                if cross >= 0:\n                    success = False\n                    break\n            ret[i, j] = success\n    return ret\n\n\ndef boxes3d_to_corners3d_lidar(boxes3d, bottom_center=True):\n    \"\"\"Convert kitti center boxes to corners.\n\n        7 -------- 4\n       /|         /|\n      6 -------- 5 .\n      | |        | |\n      . 3 -------- 0\n      |/         |/\n      2 -------- 1\n\n    Note:\n        This function is for LiDAR boxes only.\n\n    Args:\n        boxes3d (np.ndarray): Boxes with shape of (N, 7)\n            [x, y, z, x_size, y_size, z_size, ry] in LiDAR coords,\n            see the definition of ry in KITTI dataset.\n        bottom_center (bool, optional): Whether z is on the bottom center\n            of object. Defaults to True.\n\n    Returns:\n        np.ndarray: Box corners with the shape of [N, 8, 3].\n    \"\"\"\n    boxes_num = boxes3d.shape[0]\n    x_size, y_size, z_size = boxes3d[:, 3], boxes3d[:, 4], boxes3d[:, 5]\n    x_corners = np.array([\n        x_size / 2., -x_size / 2., -x_size / 2., x_size / 2., x_size / 2.,\n        -x_size / 2., -x_size / 2., x_size / 2.\n    ],\n                         dtype=np.float32).T\n    y_corners = np.array([\n        -y_size / 2., -y_size / 2., y_size / 2., y_size / 2., -y_size / 2.,\n        -y_size / 2., y_size / 2., y_size / 2.\n    ],\n                         dtype=np.float32).T\n    if bottom_center:\n        z_corners = np.zeros((boxes_num, 8), dtype=np.float32)\n        z_corners[:, 4:8] = z_size.reshape(boxes_num, 1).repeat(\n            4, axis=1)  # (N, 8)\n    else:\n        z_corners = np.array([\n            -z_size / 2., -z_size / 2., -z_size / 2., -z_size / 2.,\n            z_size / 2., z_size / 2., z_size / 2., z_size / 2.\n        ],\n                             dtype=np.float32).T\n\n    ry = boxes3d[:, 6]\n    zeros, ones = np.zeros(\n        ry.size, dtype=np.float32), np.ones(\n            ry.size, dtype=np.float32)\n    rot_list = np.array([[np.cos(ry), np.sin(ry), zeros],\n                         [-np.sin(ry), np.cos(ry), zeros],\n                         [zeros, zeros, ones]])  # (3, 3, N)\n    R_list = np.transpose(rot_list, (2, 0, 1))  # (N, 3, 3)\n\n    temp_corners = np.concatenate((x_corners.reshape(\n        -1, 8, 1), y_corners.reshape(-1, 8, 1), z_corners.reshape(-1, 8, 1)),\n                                  axis=2)  # (N, 8, 3)\n    rotated_corners = np.matmul(temp_corners, R_list)  # (N, 8, 3)\n    x_corners = rotated_corners[:, :, 0]\n    y_corners = rotated_corners[:, :, 1]\n    z_corners = rotated_corners[:, :, 2]\n\n    x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2]\n\n    x = x_loc.reshape(-1, 1) + x_corners.reshape(-1, 8)\n    y = y_loc.reshape(-1, 1) + y_corners.reshape(-1, 8)\n    z = z_loc.reshape(-1, 1) + z_corners.reshape(-1, 8)\n\n    corners = np.concatenate(\n        (x.reshape(-1, 8, 1), y.reshape(-1, 8, 1), z.reshape(-1, 8, 1)),\n        axis=2)\n\n    return corners.astype(np.float32)\n"
  },
  {
    "path": "mmdet3d/core/bbox/coders/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmdet.core.bbox import build_bbox_coder\nfrom .anchor_free_bbox_coder import AnchorFreeBBoxCoder\nfrom .centerpoint_bbox_coders import CenterPointBBoxCoder\nfrom .delta_xyzwhlr_bbox_coder import DeltaXYZWLHRBBoxCoder\nfrom .fcos3d_bbox_coder import FCOS3DBBoxCoder\nfrom .groupfree3d_bbox_coder import GroupFree3DBBoxCoder\nfrom .monoflex_bbox_coder import MonoFlexCoder\nfrom .partial_bin_based_bbox_coder import PartialBinBasedBBoxCoder\nfrom .pgd_bbox_coder import PGDBBoxCoder\nfrom .point_xyzwhlr_bbox_coder import PointXYZWHLRBBoxCoder\nfrom .smoke_bbox_coder import SMOKECoder\n\n__all__ = [\n    'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'PartialBinBasedBBoxCoder',\n    'CenterPointBBoxCoder', 'AnchorFreeBBoxCoder', 'GroupFree3DBBoxCoder',\n    'PointXYZWHLRBBoxCoder', 'FCOS3DBBoxCoder', 'PGDBBoxCoder', 'SMOKECoder',\n    'MonoFlexCoder'\n]\n"
  },
  {
    "path": "mmdet3d/core/bbox/coders/anchor_free_bbox_coder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\n\nfrom mmdet.core.bbox.builder import BBOX_CODERS\nfrom .partial_bin_based_bbox_coder import PartialBinBasedBBoxCoder\n\n\n@BBOX_CODERS.register_module()\nclass AnchorFreeBBoxCoder(PartialBinBasedBBoxCoder):\n    \"\"\"Anchor free bbox coder for 3D boxes.\n\n    Args:\n        num_dir_bins (int): Number of bins to encode direction angle.\n        with_rot (bool): Whether the bbox is with rotation.\n    \"\"\"\n\n    def __init__(self, num_dir_bins, with_rot=True):\n        super(AnchorFreeBBoxCoder, self).__init__(\n            num_dir_bins, 0, [], with_rot=with_rot)\n        self.num_dir_bins = num_dir_bins\n        self.with_rot = with_rot\n\n    def encode(self, gt_bboxes_3d, gt_labels_3d):\n        \"\"\"Encode ground truth to prediction targets.\n\n        Args:\n            gt_bboxes_3d (BaseInstance3DBoxes): Ground truth bboxes\n                with shape (n, 7).\n            gt_labels_3d (torch.Tensor): Ground truth classes.\n\n        Returns:\n            tuple: Targets of center, size and direction.\n        \"\"\"\n        # generate center target\n        center_target = gt_bboxes_3d.gravity_center\n\n        # generate bbox size target\n        size_res_target = gt_bboxes_3d.dims / 2\n\n        # generate dir target\n        box_num = gt_labels_3d.shape[0]\n        if self.with_rot:\n            (dir_class_target,\n             dir_res_target) = self.angle2class(gt_bboxes_3d.yaw)\n            dir_res_target /= (2 * np.pi / self.num_dir_bins)\n        else:\n            dir_class_target = gt_labels_3d.new_zeros(box_num)\n            dir_res_target = gt_bboxes_3d.tensor.new_zeros(box_num)\n\n        return (center_target, size_res_target, dir_class_target,\n                dir_res_target)\n\n    def decode(self, bbox_out):\n        \"\"\"Decode predicted parts to bbox3d.\n\n        Args:\n            bbox_out (dict): Predictions from model, should contain keys below.\n\n                - center: predicted bottom center of bboxes.\n                - dir_class: predicted bbox direction class.\n                - dir_res: predicted bbox direction residual.\n                - size: predicted bbox size.\n\n        Returns:\n            torch.Tensor: Decoded bbox3d with shape (batch, n, 7).\n        \"\"\"\n        center = bbox_out['center']\n        batch_size, num_proposal = center.shape[:2]\n\n        # decode heading angle\n        if self.with_rot:\n            dir_class = torch.argmax(bbox_out['dir_class'], -1)\n            dir_res = torch.gather(bbox_out['dir_res'], 2,\n                                   dir_class.unsqueeze(-1))\n            dir_res.squeeze_(2)\n            dir_angle = self.class2angle(dir_class, dir_res).reshape(\n                batch_size, num_proposal, 1)\n        else:\n            dir_angle = center.new_zeros(batch_size, num_proposal, 1)\n\n        # decode bbox size\n        bbox_size = torch.clamp(bbox_out['size'] * 2, min=0.1)\n\n        bbox3d = torch.cat([center, bbox_size, dir_angle], dim=-1)\n        return bbox3d\n\n    def split_pred(self, cls_preds, reg_preds, base_xyz):\n        \"\"\"Split predicted features to specific parts.\n\n        Args:\n            cls_preds (torch.Tensor): Class predicted features to split.\n            reg_preds (torch.Tensor): Regression predicted features to split.\n            base_xyz (torch.Tensor): Coordinates of points.\n\n        Returns:\n            dict[str, torch.Tensor]: Split results.\n        \"\"\"\n        results = {}\n        results['obj_scores'] = cls_preds\n\n        start, end = 0, 0\n        reg_preds_trans = reg_preds.transpose(2, 1)\n\n        # decode center\n        end += 3\n        # (batch_size, num_proposal, 3)\n        results['center_offset'] = reg_preds_trans[..., start:end]\n        results['center'] = base_xyz.detach() + reg_preds_trans[..., start:end]\n        start = end\n\n        # decode center\n        end += 3\n        # (batch_size, num_proposal, 3)\n        results['size'] = reg_preds_trans[..., start:end]\n        start = end\n\n        # decode direction\n        end += self.num_dir_bins\n        results['dir_class'] = reg_preds_trans[..., start:end]\n        start = end\n\n        end += self.num_dir_bins\n        dir_res_norm = reg_preds_trans[..., start:end]\n        start = end\n\n        results['dir_res_norm'] = dir_res_norm\n        results['dir_res'] = dir_res_norm * (2 * np.pi / self.num_dir_bins)\n\n        return results\n"
  },
  {
    "path": "mmdet3d/core/bbox/coders/centerpoint_bbox_coders.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet.core.bbox import BaseBBoxCoder\nfrom mmdet.core.bbox.builder import BBOX_CODERS\n\n\n@BBOX_CODERS.register_module()\nclass CenterPointBBoxCoder(BaseBBoxCoder):\n    \"\"\"Bbox coder for CenterPoint.\n\n    Args:\n        pc_range (list[float]): Range of point cloud.\n        out_size_factor (int): Downsample factor of the model.\n        voxel_size (list[float]): Size of voxel.\n        post_center_range (list[float], optional): Limit of the center.\n            Default: None.\n        max_num (int, optional): Max number to be kept. Default: 100.\n        score_threshold (float, optional): Threshold to filter boxes\n            based on score. Default: None.\n        code_size (int, optional): Code size of bboxes. Default: 9\n    \"\"\"\n\n    def __init__(self,\n                 pc_range,\n                 out_size_factor,\n                 voxel_size,\n                 post_center_range=None,\n                 max_num=100,\n                 score_threshold=None,\n                 code_size=9):\n\n        self.pc_range = pc_range\n        self.out_size_factor = out_size_factor\n        self.voxel_size = voxel_size\n        self.post_center_range = post_center_range\n        self.max_num = max_num\n        self.score_threshold = score_threshold\n        self.code_size = code_size\n\n    def _gather_feat(self, feats, inds, feat_masks=None):\n        \"\"\"Given feats and indexes, returns the gathered feats.\n\n        Args:\n            feats (torch.Tensor): Features to be transposed and gathered\n                with the shape of [B, 2, W, H].\n            inds (torch.Tensor): Indexes with the shape of [B, N].\n            feat_masks (torch.Tensor, optional): Mask of the feats.\n                Default: None.\n\n        Returns:\n            torch.Tensor: Gathered feats.\n        \"\"\"\n        dim = feats.size(2)\n        inds = inds.unsqueeze(2).expand(inds.size(0), inds.size(1), dim)\n        feats = feats.gather(1, inds)\n        if feat_masks is not None:\n            feat_masks = feat_masks.unsqueeze(2).expand_as(feats)\n            feats = feats[feat_masks]\n            feats = feats.view(-1, dim)\n        return feats\n\n    def _topk(self, scores, K=80):\n        \"\"\"Get indexes based on scores.\n\n        Args:\n            scores (torch.Tensor): scores with the shape of [B, N, W, H].\n            K (int, optional): Number to be kept. Defaults to 80.\n\n        Returns:\n            tuple[torch.Tensor]\n                torch.Tensor: Selected scores with the shape of [B, K].\n                torch.Tensor: Selected indexes with the shape of [B, K].\n                torch.Tensor: Selected classes with the shape of [B, K].\n                torch.Tensor: Selected y coord with the shape of [B, K].\n                torch.Tensor: Selected x coord with the shape of [B, K].\n        \"\"\"\n        batch, cat, height, width = scores.size()\n\n        topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)\n\n        topk_inds = topk_inds % (height * width)\n        topk_ys = (topk_inds.float() /\n                   torch.tensor(width, dtype=torch.float)).int().float()\n        topk_xs = (topk_inds % width).int().float()\n\n        topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K)\n        topk_clses = (topk_ind / torch.tensor(K, dtype=torch.float)).int()\n        topk_inds = self._gather_feat(topk_inds.view(batch, -1, 1),\n                                      topk_ind).view(batch, K)\n        topk_ys = self._gather_feat(topk_ys.view(batch, -1, 1),\n                                    topk_ind).view(batch, K)\n        topk_xs = self._gather_feat(topk_xs.view(batch, -1, 1),\n                                    topk_ind).view(batch, K)\n\n        return topk_score, topk_inds, topk_clses, topk_ys, topk_xs\n\n    def _transpose_and_gather_feat(self, feat, ind):\n        \"\"\"Given feats and indexes, returns the transposed and gathered feats.\n\n        Args:\n            feat (torch.Tensor): Features to be transposed and gathered\n                with the shape of [B, 2, W, H].\n            ind (torch.Tensor): Indexes with the shape of [B, N].\n\n        Returns:\n            torch.Tensor: Transposed and gathered feats.\n        \"\"\"\n        feat = feat.permute(0, 2, 3, 1).contiguous()\n        feat = feat.view(feat.size(0), -1, feat.size(3))\n        feat = self._gather_feat(feat, ind)\n        return feat\n\n    def encode(self):\n        pass\n\n    def decode(self,\n               heat,\n               rot_sine,\n               rot_cosine,\n               hei,\n               dim,\n               vel,\n               reg=None,\n               task_id=-1):\n        \"\"\"Decode bboxes.\n\n        Args:\n            heat (torch.Tensor): Heatmap with the shape of [B, N, W, H].\n            rot_sine (torch.Tensor): Sine of rotation with the shape of\n                [B, 1, W, H].\n            rot_cosine (torch.Tensor): Cosine of rotation with the shape of\n                [B, 1, W, H].\n            hei (torch.Tensor): Height of the boxes with the shape\n                of [B, 1, W, H].\n            dim (torch.Tensor): Dim of the boxes with the shape of\n                [B, 1, W, H].\n            vel (torch.Tensor): Velocity with the shape of [B, 1, W, H].\n            reg (torch.Tensor, optional): Regression value of the boxes in\n                2D with the shape of [B, 2, W, H]. Default: None.\n            task_id (int, optional): Index of task. Default: -1.\n\n        Returns:\n            list[dict]: Decoded boxes.\n        \"\"\"\n        batch, cat, _, _ = heat.size()\n\n        scores, inds, clses, ys, xs = self._topk(heat, K=self.max_num)\n\n        if reg is not None:\n            reg = self._transpose_and_gather_feat(reg, inds)\n            reg = reg.view(batch, self.max_num, 2)\n            xs = xs.view(batch, self.max_num, 1) + reg[:, :, 0:1]\n            ys = ys.view(batch, self.max_num, 1) + reg[:, :, 1:2]\n        else:\n            xs = xs.view(batch, self.max_num, 1) + 0.5\n            ys = ys.view(batch, self.max_num, 1) + 0.5\n\n        # rotation value and direction label\n        rot_sine = self._transpose_and_gather_feat(rot_sine, inds)\n        rot_sine = rot_sine.view(batch, self.max_num, 1)\n\n        rot_cosine = self._transpose_and_gather_feat(rot_cosine, inds)\n        rot_cosine = rot_cosine.view(batch, self.max_num, 1)\n        rot = torch.atan2(rot_sine, rot_cosine)\n\n        # height in the bev\n        hei = self._transpose_and_gather_feat(hei, inds)\n        hei = hei.view(batch, self.max_num, 1)\n\n        # dim of the box\n        dim = self._transpose_and_gather_feat(dim, inds)\n        dim = dim.view(batch, self.max_num, 3)\n\n        # class label\n        clses = clses.view(batch, self.max_num).float()\n        scores = scores.view(batch, self.max_num)\n\n        xs = xs.view(\n            batch, self.max_num,\n            1) * self.out_size_factor * self.voxel_size[0] + self.pc_range[0]\n        ys = ys.view(\n            batch, self.max_num,\n            1) * self.out_size_factor * self.voxel_size[1] + self.pc_range[1]\n\n        if vel is None:  # KITTI FORMAT\n            final_box_preds = torch.cat([xs, ys, hei, dim, rot], dim=2)\n        else:  # exist velocity, nuscene format\n            vel = self._transpose_and_gather_feat(vel, inds)\n            vel = vel.view(batch, self.max_num, 2)\n            final_box_preds = torch.cat([xs, ys, hei, dim, rot, vel], dim=2)\n\n        final_scores = scores\n        final_preds = clses\n\n        # use score threshold\n        if self.score_threshold is not None:\n            thresh_mask = final_scores > self.score_threshold\n\n        if self.post_center_range is not None:\n            self.post_center_range = torch.tensor(\n                self.post_center_range, device=heat.device)\n            mask = (final_box_preds[..., :3] >=\n                    self.post_center_range[:3]).all(2)\n            mask &= (final_box_preds[..., :3] <=\n                     self.post_center_range[3:]).all(2)\n\n            predictions_dicts = []\n            for i in range(batch):\n                cmask = mask[i, :]\n                if self.score_threshold:\n                    cmask &= thresh_mask[i]\n\n                boxes3d = final_box_preds[i, cmask]\n                scores = final_scores[i, cmask]\n                labels = final_preds[i, cmask]\n                predictions_dict = {\n                    'bboxes': boxes3d,\n                    'scores': scores,\n                    'labels': labels\n                }\n\n                predictions_dicts.append(predictions_dict)\n        else:\n            raise NotImplementedError(\n                'Need to reorganize output as a batch, only '\n                'support post_center_range is not None for now!')\n\n        return predictions_dicts\n"
  },
  {
    "path": "mmdet3d/core/bbox/coders/delta_xyzwhlr_bbox_coder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet.core.bbox import BaseBBoxCoder\nfrom mmdet.core.bbox.builder import BBOX_CODERS\n\n\n@BBOX_CODERS.register_module()\nclass DeltaXYZWLHRBBoxCoder(BaseBBoxCoder):\n    \"\"\"Bbox Coder for 3D boxes.\n\n    Args:\n        code_size (int): The dimension of boxes to be encoded.\n    \"\"\"\n\n    def __init__(self, code_size=7):\n        super(DeltaXYZWLHRBBoxCoder, self).__init__()\n        self.code_size = code_size\n\n    @staticmethod\n    def encode(src_boxes, dst_boxes):\n        \"\"\"Get box regression transformation deltas (dx, dy, dz, dx_size,\n        dy_size, dz_size, dr, dv*) that can be used to transform the\n        `src_boxes` into the `target_boxes`.\n\n        Args:\n            src_boxes (torch.Tensor): source boxes, e.g., object proposals.\n            dst_boxes (torch.Tensor): target of the transformation, e.g.,\n                ground-truth boxes.\n\n        Returns:\n            torch.Tensor: Box transformation deltas.\n        \"\"\"\n        box_ndim = src_boxes.shape[-1]\n        cas, cgs, cts = [], [], []\n        if box_ndim > 7:\n            xa, ya, za, wa, la, ha, ra, *cas = torch.split(\n                src_boxes, 1, dim=-1)\n            xg, yg, zg, wg, lg, hg, rg, *cgs = torch.split(\n                dst_boxes, 1, dim=-1)\n            cts = [g - a for g, a in zip(cgs, cas)]\n        else:\n            xa, ya, za, wa, la, ha, ra = torch.split(src_boxes, 1, dim=-1)\n            xg, yg, zg, wg, lg, hg, rg = torch.split(dst_boxes, 1, dim=-1)\n        za = za + ha / 2\n        zg = zg + hg / 2\n        diagonal = torch.sqrt(la**2 + wa**2)\n        xt = (xg - xa) / diagonal\n        yt = (yg - ya) / diagonal\n        zt = (zg - za) / ha\n        lt = torch.log(lg / la)\n        wt = torch.log(wg / wa)\n        ht = torch.log(hg / ha)\n        rt = rg - ra\n        return torch.cat([xt, yt, zt, wt, lt, ht, rt, *cts], dim=-1)\n\n    @staticmethod\n    def decode(anchors, deltas):\n        \"\"\"Apply transformation `deltas` (dx, dy, dz, dx_size, dy_size,\n        dz_size, dr, dv*) to `boxes`.\n\n        Args:\n            anchors (torch.Tensor): Parameters of anchors with shape (N, 7).\n            deltas (torch.Tensor): Encoded boxes with shape\n                (N, 7+n) [x, y, z, x_size, y_size, z_size, r, velo*].\n\n        Returns:\n            torch.Tensor: Decoded boxes.\n        \"\"\"\n        cas, cts = [], []\n        box_ndim = anchors.shape[-1]\n        if box_ndim > 7:\n            xa, ya, za, wa, la, ha, ra, *cas = torch.split(anchors, 1, dim=-1)\n            xt, yt, zt, wt, lt, ht, rt, *cts = torch.split(deltas, 1, dim=-1)\n        else:\n            xa, ya, za, wa, la, ha, ra = torch.split(anchors, 1, dim=-1)\n            xt, yt, zt, wt, lt, ht, rt = torch.split(deltas, 1, dim=-1)\n\n        za = za + ha / 2\n        diagonal = torch.sqrt(la**2 + wa**2)\n        xg = xt * diagonal + xa\n        yg = yt * diagonal + ya\n        zg = zt * ha + za\n\n        lg = torch.exp(lt) * la\n        wg = torch.exp(wt) * wa\n        hg = torch.exp(ht) * ha\n        rg = rt + ra\n        zg = zg - hg / 2\n        cgs = [t + a for t, a in zip(cts, cas)]\n        return torch.cat([xg, yg, zg, wg, lg, hg, rg, *cgs], dim=-1)\n"
  },
  {
    "path": "mmdet3d/core/bbox/coders/fcos3d_bbox_coder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\n\nfrom mmdet.core.bbox import BaseBBoxCoder\nfrom mmdet.core.bbox.builder import BBOX_CODERS\nfrom ..structures import limit_period\n\n\n@BBOX_CODERS.register_module()\nclass FCOS3DBBoxCoder(BaseBBoxCoder):\n    \"\"\"Bounding box coder for FCOS3D.\n\n    Args:\n        base_depths (tuple[tuple[float]]): Depth references for decode box\n            depth. Defaults to None.\n        base_dims (tuple[tuple[float]]): Dimension references for decode box\n            dimension. Defaults to None.\n        code_size (int): The dimension of boxes to be encoded. Defaults to 7.\n        norm_on_bbox (bool): Whether to apply normalization on the bounding\n            box 2D attributes. Defaults to True.\n    \"\"\"\n\n    def __init__(self,\n                 base_depths=None,\n                 base_dims=None,\n                 code_size=7,\n                 norm_on_bbox=True):\n        super(FCOS3DBBoxCoder, self).__init__()\n        self.base_depths = base_depths\n        self.base_dims = base_dims\n        self.bbox_code_size = code_size\n        self.norm_on_bbox = norm_on_bbox\n\n    def encode(self, gt_bboxes_3d, gt_labels_3d, gt_bboxes, gt_labels):\n        # TODO: refactor the encoder in the FCOS3D and PGD head\n        pass\n\n    def decode(self, bbox, scale, stride, training, cls_score=None):\n        \"\"\"Decode regressed results into 3D predictions.\n\n        Note that offsets are not transformed to the projected 3D centers.\n\n        Args:\n            bbox (torch.Tensor): Raw bounding box predictions in shape\n                [N, C, H, W].\n            scale (tuple[`Scale`]): Learnable scale parameters.\n            stride (int): Stride for a specific feature level.\n            training (bool): Whether the decoding is in the training\n                procedure.\n            cls_score (torch.Tensor): Classification score map for deciding\n                which base depth or dim is used. Defaults to None.\n\n        Returns:\n            torch.Tensor: Decoded boxes.\n        \"\"\"\n        # scale the bbox of different level\n        # only apply to offset, depth and size prediction\n        scale_offset, scale_depth, scale_size = scale[0:3]\n\n        clone_bbox = bbox.clone()\n        bbox[:, :2] = scale_offset(clone_bbox[:, :2]).float()\n        bbox[:, 2] = scale_depth(clone_bbox[:, 2]).float()\n        bbox[:, 3:6] = scale_size(clone_bbox[:, 3:6]).float()\n\n        if self.base_depths is None:\n            bbox[:, 2] = bbox[:, 2].exp()\n        elif len(self.base_depths) == 1:  # only single prior\n            mean = self.base_depths[0][0]\n            std = self.base_depths[0][1]\n            bbox[:, 2] = mean + bbox.clone()[:, 2] * std\n        else:  # multi-class priors\n            assert len(self.base_depths) == cls_score.shape[1], \\\n                'The number of multi-class depth priors should be equal to ' \\\n                'the number of categories.'\n            indices = cls_score.max(dim=1)[1]\n            depth_priors = cls_score.new_tensor(\n                self.base_depths)[indices, :].permute(0, 3, 1, 2)\n            mean = depth_priors[:, 0]\n            std = depth_priors[:, 1]\n            bbox[:, 2] = mean + bbox.clone()[:, 2] * std\n\n        bbox[:, 3:6] = bbox[:, 3:6].exp()\n        if self.base_dims is not None:\n            assert len(self.base_dims) == cls_score.shape[1], \\\n                'The number of anchor sizes should be equal to the number ' \\\n                'of categories.'\n            indices = cls_score.max(dim=1)[1]\n            size_priors = cls_score.new_tensor(\n                self.base_dims)[indices, :].permute(0, 3, 1, 2)\n            bbox[:, 3:6] = size_priors * bbox.clone()[:, 3:6]\n\n        assert self.norm_on_bbox is True, 'Setting norm_on_bbox to False '\\\n            'has not been thoroughly tested for FCOS3D.'\n        if self.norm_on_bbox:\n            if not training:\n                # Note that this line is conducted only when testing\n                bbox[:, :2] *= stride\n\n        return bbox\n\n    @staticmethod\n    def decode_yaw(bbox, centers2d, dir_cls, dir_offset, cam2img):\n        \"\"\"Decode yaw angle and change it from local to global.i.\n\n        Args:\n            bbox (torch.Tensor): Bounding box predictions in shape\n                [N, C] with yaws to be decoded.\n            centers2d (torch.Tensor): Projected 3D-center on the image planes\n                corresponding to the box predictions.\n            dir_cls (torch.Tensor): Predicted direction classes.\n            dir_offset (float): Direction offset before dividing all the\n                directions into several classes.\n            cam2img (torch.Tensor): Camera intrinsic matrix in shape [4, 4].\n\n        Returns:\n            torch.Tensor: Bounding boxes with decoded yaws.\n        \"\"\"\n        if bbox.shape[0] > 0:\n            dir_rot = limit_period(bbox[..., 6] - dir_offset, 0, np.pi)\n            bbox[..., 6] = \\\n                dir_rot + dir_offset + np.pi * dir_cls.to(bbox.dtype)\n\n        bbox[:, 6] = torch.atan2(centers2d[:, 0] - cam2img[0, 2],\n                                 cam2img[0, 0]) + bbox[:, 6]\n\n        return bbox\n"
  },
  {
    "path": "mmdet3d/core/bbox/coders/groupfree3d_bbox_coder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\n\nfrom mmdet.core.bbox.builder import BBOX_CODERS\nfrom .partial_bin_based_bbox_coder import PartialBinBasedBBoxCoder\n\n\n@BBOX_CODERS.register_module()\nclass GroupFree3DBBoxCoder(PartialBinBasedBBoxCoder):\n    \"\"\"Modified partial bin based bbox coder for GroupFree3D.\n\n    Args:\n        num_dir_bins (int): Number of bins to encode direction angle.\n        num_sizes (int): Number of size clusters.\n        mean_sizes (list[list[int]]): Mean size of bboxes in each class.\n        with_rot (bool, optional): Whether the bbox is with rotation.\n            Defaults to True.\n        size_cls_agnostic (bool, optional): Whether the predicted size is\n            class-agnostic. Defaults to True.\n    \"\"\"\n\n    def __init__(self,\n                 num_dir_bins,\n                 num_sizes,\n                 mean_sizes,\n                 with_rot=True,\n                 size_cls_agnostic=True):\n        super(GroupFree3DBBoxCoder, self).__init__(\n            num_dir_bins=num_dir_bins,\n            num_sizes=num_sizes,\n            mean_sizes=mean_sizes,\n            with_rot=with_rot)\n        self.size_cls_agnostic = size_cls_agnostic\n\n    def encode(self, gt_bboxes_3d, gt_labels_3d):\n        \"\"\"Encode ground truth to prediction targets.\n\n        Args:\n            gt_bboxes_3d (BaseInstance3DBoxes): Ground truth bboxes\n                with shape (n, 7).\n            gt_labels_3d (torch.Tensor): Ground truth classes.\n\n        Returns:\n            tuple: Targets of center, size and direction.\n        \"\"\"\n        # generate center target\n        center_target = gt_bboxes_3d.gravity_center\n\n        # generate bbox size target\n        size_target = gt_bboxes_3d.dims\n        size_class_target = gt_labels_3d\n        size_res_target = gt_bboxes_3d.dims - gt_bboxes_3d.tensor.new_tensor(\n            self.mean_sizes)[size_class_target]\n\n        # generate dir target\n        box_num = gt_labels_3d.shape[0]\n        if self.with_rot:\n            (dir_class_target,\n             dir_res_target) = self.angle2class(gt_bboxes_3d.yaw)\n        else:\n            dir_class_target = gt_labels_3d.new_zeros(box_num)\n            dir_res_target = gt_bboxes_3d.tensor.new_zeros(box_num)\n\n        return (center_target, size_target, size_class_target, size_res_target,\n                dir_class_target, dir_res_target)\n\n    def decode(self, bbox_out, prefix=''):\n        \"\"\"Decode predicted parts to bbox3d.\n\n        Args:\n            bbox_out (dict): Predictions from model, should contain keys below.\n\n                - center: predicted bottom center of bboxes.\n                - dir_class: predicted bbox direction class.\n                - dir_res: predicted bbox direction residual.\n                - size_class: predicted bbox size class.\n                - size_res: predicted bbox size residual.\n                - size: predicted class-agnostic bbox size\n            prefix (str, optional): Decode predictions with specific prefix.\n                Defaults to ''.\n\n        Returns:\n            torch.Tensor: Decoded bbox3d with shape (batch, n, 7).\n        \"\"\"\n        center = bbox_out[f'{prefix}center']\n        batch_size, num_proposal = center.shape[:2]\n\n        # decode heading angle\n        if self.with_rot:\n            dir_class = torch.argmax(bbox_out[f'{prefix}dir_class'], -1)\n            dir_res = torch.gather(bbox_out[f'{prefix}dir_res'], 2,\n                                   dir_class.unsqueeze(-1))\n            dir_res.squeeze_(2)\n            dir_angle = self.class2angle(dir_class, dir_res).reshape(\n                batch_size, num_proposal, 1)\n        else:\n            dir_angle = center.new_zeros(batch_size, num_proposal, 1)\n\n        # decode bbox size\n        if self.size_cls_agnostic:\n            bbox_size = bbox_out[f'{prefix}size'].reshape(\n                batch_size, num_proposal, 3)\n        else:\n            size_class = torch.argmax(\n                bbox_out[f'{prefix}size_class'], -1, keepdim=True)\n            size_res = torch.gather(\n                bbox_out[f'{prefix}size_res'], 2,\n                size_class.unsqueeze(-1).repeat(1, 1, 1, 3))\n            mean_sizes = center.new_tensor(self.mean_sizes)\n            size_base = torch.index_select(mean_sizes, 0,\n                                           size_class.reshape(-1))\n            bbox_size = size_base.reshape(batch_size, num_proposal,\n                                          -1) + size_res.squeeze(2)\n\n        bbox3d = torch.cat([center, bbox_size, dir_angle], dim=-1)\n        return bbox3d\n\n    def split_pred(self, cls_preds, reg_preds, base_xyz, prefix=''):\n        \"\"\"Split predicted features to specific parts.\n\n        Args:\n            cls_preds (torch.Tensor): Class predicted features to split.\n            reg_preds (torch.Tensor): Regression predicted features to split.\n            base_xyz (torch.Tensor): Coordinates of points.\n            prefix (str, optional): Decode predictions with specific prefix.\n                Defaults to ''.\n\n        Returns:\n            dict[str, torch.Tensor]: Split results.\n        \"\"\"\n        results = {}\n        start, end = 0, 0\n\n        cls_preds_trans = cls_preds.transpose(2, 1)\n        reg_preds_trans = reg_preds.transpose(2, 1)\n\n        # decode center\n        end += 3\n        # (batch_size, num_proposal, 3)\n        results[f'{prefix}center_residual'] = \\\n            reg_preds_trans[..., start:end].contiguous()\n        results[f'{prefix}center'] = base_xyz + \\\n            reg_preds_trans[..., start:end].contiguous()\n        start = end\n\n        # decode direction\n        end += self.num_dir_bins\n        results[f'{prefix}dir_class'] = \\\n            reg_preds_trans[..., start:end].contiguous()\n        start = end\n\n        end += self.num_dir_bins\n        dir_res_norm = reg_preds_trans[..., start:end].contiguous()\n        start = end\n\n        results[f'{prefix}dir_res_norm'] = dir_res_norm\n        results[f'{prefix}dir_res'] = dir_res_norm * (\n            np.pi / self.num_dir_bins)\n\n        # decode size\n        if self.size_cls_agnostic:\n            end += 3\n            results[f'{prefix}size'] = \\\n                reg_preds_trans[..., start:end].contiguous()\n        else:\n            end += self.num_sizes\n            results[f'{prefix}size_class'] = reg_preds_trans[\n                ..., start:end].contiguous()\n            start = end\n\n            end += self.num_sizes * 3\n            size_res_norm = reg_preds_trans[..., start:end]\n            batch_size, num_proposal = reg_preds_trans.shape[:2]\n            size_res_norm = size_res_norm.view(\n                [batch_size, num_proposal, self.num_sizes, 3])\n            start = end\n\n            results[f'{prefix}size_res_norm'] = size_res_norm.contiguous()\n            mean_sizes = reg_preds.new_tensor(self.mean_sizes)\n            results[f'{prefix}size_res'] = (\n                size_res_norm * mean_sizes.unsqueeze(0).unsqueeze(0))\n\n        # decode objectness score\n        # Group-Free-3D objectness output shape (batch, proposal, 1)\n        results[f'{prefix}obj_scores'] = cls_preds_trans[..., :1].contiguous()\n\n        # decode semantic score\n        results[f'{prefix}sem_scores'] = cls_preds_trans[..., 1:].contiguous()\n\n        return results\n"
  },
  {
    "path": "mmdet3d/core/bbox/coders/monoflex_bbox_coder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nfrom torch.nn import functional as F\n\nfrom mmdet.core.bbox import BaseBBoxCoder\nfrom mmdet.core.bbox.builder import BBOX_CODERS\n\n\n@BBOX_CODERS.register_module()\nclass MonoFlexCoder(BaseBBoxCoder):\n    \"\"\"Bbox Coder for MonoFlex.\n\n    Args:\n        depth_mode (str): The mode for depth calculation.\n            Available options are \"linear\", \"inv_sigmoid\", and \"exp\".\n        base_depth (tuple[float]): References for decoding box depth.\n        depth_range (list): Depth range of predicted depth.\n        combine_depth (bool): Whether to use combined depth (direct depth\n            and depth from keypoints) or use direct depth only.\n        uncertainty_range (list): Uncertainty range of predicted depth.\n        base_dims (tuple[tuple[float]]): Dimensions mean and std of decode bbox\n            dimensions [l, h, w] for each category.\n        dims_mode (str): The mode for dimension calculation.\n            Available options are \"linear\" and \"exp\".\n        multibin (bool): Whether to use multibin representation.\n        num_dir_bins (int): Number of Number of bins to encode\n            direction angle.\n        bin_centers (list[float]): Local yaw centers while using multibin\n            representations.\n        bin_margin (float): Margin of multibin representations.\n        code_size (int): The dimension of boxes to be encoded.\n        eps (float, optional): A value added to the denominator for numerical\n            stability. Default 1e-3.\n    \"\"\"\n\n    def __init__(self,\n                 depth_mode,\n                 base_depth,\n                 depth_range,\n                 combine_depth,\n                 uncertainty_range,\n                 base_dims,\n                 dims_mode,\n                 multibin,\n                 num_dir_bins,\n                 bin_centers,\n                 bin_margin,\n                 code_size,\n                 eps=1e-3):\n        super(MonoFlexCoder, self).__init__()\n\n        # depth related\n        self.depth_mode = depth_mode\n        self.base_depth = base_depth\n        self.depth_range = depth_range\n        self.combine_depth = combine_depth\n        self.uncertainty_range = uncertainty_range\n\n        # dimensions related\n        self.base_dims = base_dims\n        self.dims_mode = dims_mode\n\n        # orientation related\n        self.multibin = multibin\n        self.num_dir_bins = num_dir_bins\n        self.bin_centers = bin_centers\n        self.bin_margin = bin_margin\n\n        # output related\n        self.bbox_code_size = code_size\n        self.eps = eps\n\n    def encode(self, gt_bboxes_3d):\n        \"\"\"Encode ground truth to prediction targets.\n\n        Args:\n            gt_bboxes_3d (`BaseInstance3DBoxes`): Ground truth 3D bboxes.\n                shape: (N, 7).\n\n        Returns:\n            torch.Tensor: Targets of orientations.\n        \"\"\"\n        local_yaw = gt_bboxes_3d.local_yaw\n        # encode local yaw (-pi ~ pi) to multibin format\n        encode_local_yaw = local_yaw.new_zeros(\n            [local_yaw.shape[0], self.num_dir_bins * 2])\n        bin_size = 2 * np.pi / self.num_dir_bins\n        margin_size = bin_size * self.bin_margin\n\n        bin_centers = local_yaw.new_tensor(self.bin_centers)\n        range_size = bin_size / 2 + margin_size\n\n        offsets = local_yaw.unsqueeze(1) - bin_centers.unsqueeze(0)\n        offsets[offsets > np.pi] = offsets[offsets > np.pi] - 2 * np.pi\n        offsets[offsets < -np.pi] = offsets[offsets < -np.pi] + 2 * np.pi\n\n        for i in range(self.num_dir_bins):\n            offset = offsets[:, i]\n            inds = abs(offset) < range_size\n            encode_local_yaw[inds, i] = 1\n            encode_local_yaw[inds, i + self.num_dir_bins] = offset[inds]\n\n        orientation_target = encode_local_yaw\n\n        return orientation_target\n\n    def decode(self, bbox, base_centers2d, labels, downsample_ratio, cam2imgs):\n        \"\"\"Decode bounding box regression into 3D predictions.\n\n        Args:\n            bbox (Tensor): Raw bounding box predictions for each\n                predict center2d point.\n                shape: (N, C)\n            base_centers2d (torch.Tensor): Base centers2d for 3D bboxes.\n                shape: (N, 2).\n            labels (Tensor): Batch predict class label for each predict\n                center2d point.\n                shape: (N, )\n            downsample_ratio (int): The stride of feature map.\n            cam2imgs (Tensor): Batch images' camera intrinsic matrix.\n                shape: kitti (N, 4, 4)  nuscenes (N, 3, 3)\n\n        Return:\n            dict: The 3D prediction dict decoded from regression map.\n            the dict has components below:\n                - bboxes2d (torch.Tensor): Decoded [x1, y1, x2, y2] format\n                    2D bboxes.\n                - dimensions (torch.Tensor): Decoded dimensions for each\n                    object.\n                - offsets2d (torch.Tenosr): Offsets between base centers2d\n                    and real centers2d.\n                - direct_depth (torch.Tensor): Decoded directly regressed\n                    depth.\n                - keypoints2d (torch.Tensor): Keypoints of each projected\n                    3D box on image.\n                - keypoints_depth (torch.Tensor): Decoded depth from keypoints.\n                - combined_depth (torch.Tensor): Combined depth using direct\n                    depth and keypoints depth with depth uncertainty.\n                - orientations (torch.Tensor): Multibin format orientations\n                    (local yaw) for each objects.\n        \"\"\"\n\n        # 4 dimensions for FCOS style regression\n        pred_bboxes2d = bbox[:, 0:4]\n\n        # change FCOS style to [x1, y1, x2, y2] format for IOU Loss\n        pred_bboxes2d = self.decode_bboxes2d(pred_bboxes2d, base_centers2d)\n\n        # 2 dimensions for projected centers2d offsets\n        pred_offsets2d = bbox[:, 4:6]\n\n        # 3 dimensions for 3D bbox dimensions offsets\n        pred_dimensions_offsets3d = bbox[:, 29:32]\n\n        # the first 8 dimensions are for orientation bin classification\n        # and the second 8 dimensions are for orientation offsets.\n        pred_orientations = torch.cat((bbox[:, 32:40], bbox[:, 40:48]), dim=1)\n\n        # 3 dimensions for the uncertainties of the solved depths from\n        # groups of keypoints\n        pred_keypoints_depth_uncertainty = bbox[:, 26:29]\n\n        # 1 dimension for the uncertainty of directly regressed depth\n        pred_direct_depth_uncertainty = bbox[:, 49:50].squeeze(-1)\n\n        # 2 dimension of offsets x keypoints (8 corners + top/bottom center)\n        pred_keypoints2d = bbox[:, 6:26].reshape(-1, 10, 2)\n\n        # 1 dimension for depth offsets\n        pred_direct_depth_offsets = bbox[:, 48:49].squeeze(-1)\n\n        # decode the pred residual dimensions to real dimensions\n        pred_dimensions = self.decode_dims(labels, pred_dimensions_offsets3d)\n        pred_direct_depth = self.decode_direct_depth(pred_direct_depth_offsets)\n        pred_keypoints_depth = self.keypoints2depth(pred_keypoints2d,\n                                                    pred_dimensions, cam2imgs,\n                                                    downsample_ratio)\n\n        pred_direct_depth_uncertainty = torch.clamp(\n            pred_direct_depth_uncertainty, self.uncertainty_range[0],\n            self.uncertainty_range[1])\n        pred_keypoints_depth_uncertainty = torch.clamp(\n            pred_keypoints_depth_uncertainty, self.uncertainty_range[0],\n            self.uncertainty_range[1])\n\n        if self.combine_depth:\n            pred_depth_uncertainty = torch.cat(\n                (pred_direct_depth_uncertainty.unsqueeze(-1),\n                 pred_keypoints_depth_uncertainty),\n                dim=1).exp()\n            pred_depth = torch.cat(\n                (pred_direct_depth.unsqueeze(-1), pred_keypoints_depth), dim=1)\n            pred_combined_depth = \\\n                self.combine_depths(pred_depth, pred_depth_uncertainty)\n        else:\n            pred_combined_depth = None\n\n        preds = dict(\n            bboxes2d=pred_bboxes2d,\n            dimensions=pred_dimensions,\n            offsets2d=pred_offsets2d,\n            keypoints2d=pred_keypoints2d,\n            orientations=pred_orientations,\n            direct_depth=pred_direct_depth,\n            keypoints_depth=pred_keypoints_depth,\n            combined_depth=pred_combined_depth,\n            direct_depth_uncertainty=pred_direct_depth_uncertainty,\n            keypoints_depth_uncertainty=pred_keypoints_depth_uncertainty,\n        )\n\n        return preds\n\n    def decode_direct_depth(self, depth_offsets):\n        \"\"\"Transform depth offset to directly regressed depth.\n\n        Args:\n            depth_offsets (torch.Tensor): Predicted depth offsets.\n                shape: (N, )\n\n        Return:\n            torch.Tensor: Directly regressed depth.\n                shape: (N, )\n        \"\"\"\n        if self.depth_mode == 'exp':\n            direct_depth = depth_offsets.exp()\n        elif self.depth_mode == 'linear':\n            base_depth = depth_offsets.new_tensor(self.base_depth)\n            direct_depth = depth_offsets * base_depth[1] + base_depth[0]\n        elif self.depth_mode == 'inv_sigmoid':\n            direct_depth = 1 / torch.sigmoid(depth_offsets) - 1\n        else:\n            raise ValueError\n\n        if self.depth_range is not None:\n            direct_depth = torch.clamp(\n                direct_depth, min=self.depth_range[0], max=self.depth_range[1])\n\n        return direct_depth\n\n    def decode_location(self,\n                        base_centers2d,\n                        offsets2d,\n                        depths,\n                        cam2imgs,\n                        downsample_ratio,\n                        pad_mode='default'):\n        \"\"\"Retrieve object location.\n\n        Args:\n            base_centers2d (torch.Tensor): predicted base centers2d.\n                shape: (N, 2)\n            offsets2d (torch.Tensor): The offsets between real centers2d\n                and base centers2d.\n                shape: (N , 2)\n            depths (torch.Tensor): Depths of objects.\n                shape: (N, )\n            cam2imgs (torch.Tensor): Batch images' camera intrinsic matrix.\n                shape: kitti (N, 4, 4)  nuscenes (N, 3, 3)\n            downsample_ratio (int): The stride of feature map.\n            pad_mode (str, optional): Padding mode used in\n                training data augmentation.\n\n        Return:\n            tuple(torch.Tensor): Centers of 3D boxes.\n                shape: (N, 3)\n        \"\"\"\n        N = cam2imgs.shape[0]\n        # (N, 4, 4)\n        cam2imgs_inv = cam2imgs.inverse()\n        if pad_mode == 'default':\n            centers2d_img = (base_centers2d + offsets2d) * downsample_ratio\n        else:\n            raise NotImplementedError\n        # (N, 3)\n        centers2d_img = \\\n            torch.cat((centers2d_img, depths.unsqueeze(-1)), dim=1)\n        # (N, 4, 1)\n        centers2d_extend = \\\n            torch.cat((centers2d_img, centers2d_img.new_ones(N, 1)),\n                      dim=1).unsqueeze(-1)\n        locations = torch.matmul(cam2imgs_inv, centers2d_extend).squeeze(-1)\n\n        return locations[:, :3]\n\n    def keypoints2depth(self,\n                        keypoints2d,\n                        dimensions,\n                        cam2imgs,\n                        downsample_ratio=4,\n                        group0_index=[(7, 3), (0, 4)],\n                        group1_index=[(2, 6), (1, 5)]):\n        \"\"\"Decode depth form three groups of keypoints and geometry projection\n        model. 2D keypoints inlucding 8 coreners and top/bottom centers will be\n        divided into three groups which will be used to calculate three depths\n        of object.\n\n        .. code-block:: none\n\n                Group center keypoints:\n\n                             + --------------- +\n                            /|   top center   /|\n                           / |      .        / |\n                          /  |      |       /  |\n                         + ---------|----- +   +\n                         |  /       |      |  /\n                         | /        .      | /\n                         |/ bottom center  |/\n                         + --------------- +\n\n                Group 0 keypoints:\n\n                             0\n                             + -------------- +\n                            /|               /|\n                           / |              / |\n                          /  |            5/  |\n                         + -------------- +   +\n                         |  /3            |  /\n                         | /              | /\n                         |/               |/\n                         + -------------- + 6\n\n                Group 1 keypoints:\n\n                                               4\n                             + -------------- +\n                            /|               /|\n                           / |              / |\n                          /  |             /  |\n                       1 + -------------- +   + 7\n                         |  /             |  /\n                         | /              | /\n                         |/               |/\n                       2 + -------------- +\n\n\n        Args:\n            keypoints2d (torch.Tensor): Keypoints of objects.\n                8 vertices + top/bottom center.\n                shape: (N, 10, 2)\n            dimensions (torch.Tensor): Dimensions of objetcts.\n                shape: (N, 3)\n            cam2imgs (torch.Tensor): Batch images' camera intrinsic matrix.\n                shape: kitti (N, 4, 4)  nuscenes (N, 3, 3)\n            downsample_ratio (int, opitonal): The stride of feature map.\n                Defaults: 4.\n            group0_index(list[tuple[int]], optional): Keypoints group 0\n                of index to calculate the depth.\n                Defaults: [0, 3, 4, 7].\n            group1_index(list[tuple[int]], optional): Keypoints group 1\n                of index to calculate the depth.\n                Defaults: [1, 2, 5, 6]\n\n        Return:\n            tuple(torch.Tensor): Depth computed from three groups of\n                keypoints (top/bottom, group0, group1)\n                shape: (N, 3)\n        \"\"\"\n\n        pred_height_3d = dimensions[:, 1].clone()\n        f_u = cam2imgs[:, 0, 0]\n        center_height = keypoints2d[:, -2, 1] - keypoints2d[:, -1, 1]\n        corner_group0_height = keypoints2d[:, group0_index[0], 1] \\\n            - keypoints2d[:, group0_index[1], 1]\n        corner_group1_height = keypoints2d[:, group1_index[0], 1] \\\n            - keypoints2d[:, group1_index[1], 1]\n        center_depth = f_u * pred_height_3d / (\n            F.relu(center_height) * downsample_ratio + self.eps)\n        corner_group0_depth = (f_u * pred_height_3d).unsqueeze(-1) / (\n            F.relu(corner_group0_height) * downsample_ratio + self.eps)\n        corner_group1_depth = (f_u * pred_height_3d).unsqueeze(-1) / (\n            F.relu(corner_group1_height) * downsample_ratio + self.eps)\n\n        corner_group0_depth = corner_group0_depth.mean(dim=1)\n        corner_group1_depth = corner_group1_depth.mean(dim=1)\n\n        keypoints_depth = torch.stack(\n            (center_depth, corner_group0_depth, corner_group1_depth), dim=1)\n        keypoints_depth = torch.clamp(\n            keypoints_depth, min=self.depth_range[0], max=self.depth_range[1])\n\n        return keypoints_depth\n\n    def decode_dims(self, labels, dims_offset):\n        \"\"\"Retrieve object dimensions.\n\n        Args:\n            labels (torch.Tensor): Each points' category id.\n                shape: (N, K)\n            dims_offset (torch.Tensor): Dimension offsets.\n                shape: (N, 3)\n\n        Returns:\n            torch.Tensor: Shape (N, 3)\n        \"\"\"\n\n        if self.dims_mode == 'exp':\n            dims_offset = dims_offset.exp()\n        elif self.dims_mode == 'linear':\n            labels = labels.long()\n            base_dims = dims_offset.new_tensor(self.base_dims)\n            dims_mean = base_dims[:, :3]\n            dims_std = base_dims[:, 3:6]\n            cls_dimension_mean = dims_mean[labels, :]\n            cls_dimension_std = dims_std[labels, :]\n            dimensions = dims_offset * cls_dimension_mean + cls_dimension_std\n        else:\n            raise ValueError\n\n        return dimensions\n\n    def decode_orientation(self, ori_vector, locations):\n        \"\"\"Retrieve object orientation.\n\n        Args:\n            ori_vector (torch.Tensor): Local orientation vector\n                in [axis_cls, head_cls, sin, cos] format.\n                shape: (N, num_dir_bins * 4)\n            locations (torch.Tensor): Object location.\n                shape: (N, 3)\n\n        Returns:\n            tuple[torch.Tensor]: yaws and local yaws of 3d bboxes.\n        \"\"\"\n        if self.multibin:\n            pred_bin_cls = ori_vector[:, :self.num_dir_bins * 2].view(\n                -1, self.num_dir_bins, 2)\n            pred_bin_cls = pred_bin_cls.softmax(dim=2)[..., 1]\n            orientations = ori_vector.new_zeros(ori_vector.shape[0])\n            for i in range(self.num_dir_bins):\n                mask_i = (pred_bin_cls.argmax(dim=1) == i)\n                start_bin = self.num_dir_bins * 2 + i * 2\n                end_bin = start_bin + 2\n                pred_bin_offset = ori_vector[mask_i, start_bin:end_bin]\n                orientations[mask_i] = pred_bin_offset[:, 0].atan2(\n                    pred_bin_offset[:, 1]) + self.bin_centers[i]\n        else:\n            axis_cls = ori_vector[:, :2].softmax(dim=1)\n            axis_cls = axis_cls[:, 0] < axis_cls[:, 1]\n            head_cls = ori_vector[:, 2:4].softmax(dim=1)\n            head_cls = head_cls[:, 0] < head_cls[:, 1]\n            # cls axis\n            orientations = self.bin_centers[axis_cls + head_cls * 2]\n            sin_cos_offset = F.normalize(ori_vector[:, 4:])\n            orientations += sin_cos_offset[:, 0].atan(sin_cos_offset[:, 1])\n\n        locations = locations.view(-1, 3)\n        rays = locations[:, 0].atan2(locations[:, 2])\n        local_yaws = orientations\n        yaws = local_yaws + rays\n\n        larger_idx = (yaws > np.pi).nonzero(as_tuple=False)\n        small_idx = (yaws < -np.pi).nonzero(as_tuple=False)\n        if len(larger_idx) != 0:\n            yaws[larger_idx] -= 2 * np.pi\n        if len(small_idx) != 0:\n            yaws[small_idx] += 2 * np.pi\n\n        larger_idx = (local_yaws > np.pi).nonzero(as_tuple=False)\n        small_idx = (local_yaws < -np.pi).nonzero(as_tuple=False)\n        if len(larger_idx) != 0:\n            local_yaws[larger_idx] -= 2 * np.pi\n        if len(small_idx) != 0:\n            local_yaws[small_idx] += 2 * np.pi\n\n        return yaws, local_yaws\n\n    def decode_bboxes2d(self, reg_bboxes2d, base_centers2d):\n        \"\"\"Retrieve [x1, y1, x2, y2] format 2D bboxes.\n\n        Args:\n            reg_bboxes2d (torch.Tensor): Predicted FCOS style\n                2D bboxes.\n                shape: (N, 4)\n            base_centers2d (torch.Tensor): predicted base centers2d.\n                shape: (N, 2)\n\n        Returns:\n            torch.Tenosr: [x1, y1, x2, y2] format 2D bboxes.\n        \"\"\"\n        centers_x = base_centers2d[:, 0]\n        centers_y = base_centers2d[:, 1]\n\n        xs_min = centers_x - reg_bboxes2d[..., 0]\n        ys_min = centers_y - reg_bboxes2d[..., 1]\n        xs_max = centers_x + reg_bboxes2d[..., 2]\n        ys_max = centers_y + reg_bboxes2d[..., 3]\n\n        bboxes2d = torch.stack([xs_min, ys_min, xs_max, ys_max], dim=-1)\n\n        return bboxes2d\n\n    def combine_depths(self, depth, depth_uncertainty):\n        \"\"\"Combine all the prediced depths with depth uncertainty.\n\n        Args:\n            depth (torch.Tensor): Predicted depths of each object.\n                2D bboxes.\n                shape: (N, 4)\n            depth_uncertainty (torch.Tensor): Depth uncertainty for\n                each depth of each object.\n                shape: (N, 4)\n\n        Returns:\n            torch.Tenosr: combined depth.\n        \"\"\"\n        uncertainty_weights = 1 / depth_uncertainty\n        uncertainty_weights = \\\n            uncertainty_weights / \\\n            uncertainty_weights.sum(dim=1, keepdim=True)\n        combined_depth = torch.sum(depth * uncertainty_weights, dim=1)\n\n        return combined_depth\n"
  },
  {
    "path": "mmdet3d/core/bbox/coders/partial_bin_based_bbox_coder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\n\nfrom mmdet.core.bbox import BaseBBoxCoder\nfrom mmdet.core.bbox.builder import BBOX_CODERS\n\n\n@BBOX_CODERS.register_module()\nclass PartialBinBasedBBoxCoder(BaseBBoxCoder):\n    \"\"\"Partial bin based bbox coder.\n\n    Args:\n        num_dir_bins (int): Number of bins to encode direction angle.\n        num_sizes (int): Number of size clusters.\n        mean_sizes (list[list[int]]): Mean size of bboxes in each class.\n        with_rot (bool): Whether the bbox is with rotation.\n    \"\"\"\n\n    def __init__(self, num_dir_bins, num_sizes, mean_sizes, with_rot=True):\n        super(PartialBinBasedBBoxCoder, self).__init__()\n        assert len(mean_sizes) == num_sizes\n        self.num_dir_bins = num_dir_bins\n        self.num_sizes = num_sizes\n        self.mean_sizes = mean_sizes\n        self.with_rot = with_rot\n\n    def encode(self, gt_bboxes_3d, gt_labels_3d):\n        \"\"\"Encode ground truth to prediction targets.\n\n        Args:\n            gt_bboxes_3d (BaseInstance3DBoxes): Ground truth bboxes\n                with shape (n, 7).\n            gt_labels_3d (torch.Tensor): Ground truth classes.\n\n        Returns:\n            tuple: Targets of center, size and direction.\n        \"\"\"\n        # generate center target\n        center_target = gt_bboxes_3d.gravity_center\n\n        # generate bbox size target\n        size_class_target = gt_labels_3d\n        size_res_target = gt_bboxes_3d.dims - gt_bboxes_3d.tensor.new_tensor(\n            self.mean_sizes)[size_class_target]\n\n        # generate dir target\n        box_num = gt_labels_3d.shape[0]\n        if self.with_rot:\n            (dir_class_target,\n             dir_res_target) = self.angle2class(gt_bboxes_3d.yaw)\n        else:\n            dir_class_target = gt_labels_3d.new_zeros(box_num)\n            dir_res_target = gt_bboxes_3d.tensor.new_zeros(box_num)\n\n        return (center_target, size_class_target, size_res_target,\n                dir_class_target, dir_res_target)\n\n    def decode(self, bbox_out, suffix=''):\n        \"\"\"Decode predicted parts to bbox3d.\n\n        Args:\n            bbox_out (dict): Predictions from model, should contain keys below.\n\n                - center: predicted bottom center of bboxes.\n                - dir_class: predicted bbox direction class.\n                - dir_res: predicted bbox direction residual.\n                - size_class: predicted bbox size class.\n                - size_res: predicted bbox size residual.\n            suffix (str): Decode predictions with specific suffix.\n\n        Returns:\n            torch.Tensor: Decoded bbox3d with shape (batch, n, 7).\n        \"\"\"\n        center = bbox_out['center' + suffix]\n        batch_size, num_proposal = center.shape[:2]\n\n        # decode heading angle\n        if self.with_rot:\n            dir_class = torch.argmax(bbox_out['dir_class' + suffix], -1)\n            dir_res = torch.gather(bbox_out['dir_res' + suffix], 2,\n                                   dir_class.unsqueeze(-1))\n            dir_res.squeeze_(2)\n            dir_angle = self.class2angle(dir_class, dir_res).reshape(\n                batch_size, num_proposal, 1)\n        else:\n            dir_angle = center.new_zeros(batch_size, num_proposal, 1)\n\n        # decode bbox size\n        size_class = torch.argmax(\n            bbox_out['size_class' + suffix], -1, keepdim=True)\n        size_res = torch.gather(bbox_out['size_res' + suffix], 2,\n                                size_class.unsqueeze(-1).repeat(1, 1, 1, 3))\n        mean_sizes = center.new_tensor(self.mean_sizes)\n        size_base = torch.index_select(mean_sizes, 0, size_class.reshape(-1))\n        bbox_size = size_base.reshape(batch_size, num_proposal,\n                                      -1) + size_res.squeeze(2)\n\n        bbox3d = torch.cat([center, bbox_size, dir_angle], dim=-1)\n        return bbox3d\n\n    def decode_corners(self, center, size_res, size_class):\n        \"\"\"Decode center, size residuals and class to corners. Only useful for\n        axis-aligned bounding boxes, so angle isn't considered.\n\n        Args:\n            center (torch.Tensor): Shape [B, N, 3]\n            size_res (torch.Tensor): Shape [B, N, 3] or [B, N, C, 3]\n            size_class (torch.Tensor): Shape: [B, N] or [B, N, 1]\n            or [B, N, C, 3]\n\n        Returns:\n            torch.Tensor: Corners with shape [B, N, 6]\n        \"\"\"\n        if len(size_class.shape) == 2 or size_class.shape[-1] == 1:\n            batch_size, proposal_num = size_class.shape[:2]\n            one_hot_size_class = size_res.new_zeros(\n                (batch_size, proposal_num, self.num_sizes))\n            if len(size_class.shape) == 2:\n                size_class = size_class.unsqueeze(-1)\n            one_hot_size_class.scatter_(2, size_class, 1)\n            one_hot_size_class_expand = one_hot_size_class.unsqueeze(\n                -1).repeat(1, 1, 1, 3).contiguous()\n        else:\n            one_hot_size_class_expand = size_class\n\n        if len(size_res.shape) == 4:\n            size_res = torch.sum(size_res * one_hot_size_class_expand, 2)\n\n        mean_sizes = size_res.new_tensor(self.mean_sizes)\n        mean_sizes = torch.sum(mean_sizes * one_hot_size_class_expand, 2)\n        size_full = (size_res + 1) * mean_sizes\n        size_full = torch.clamp(size_full, 0)\n        half_size_full = size_full / 2\n        corner1 = center - half_size_full\n        corner2 = center + half_size_full\n        corners = torch.cat([corner1, corner2], dim=-1)\n        return corners\n\n    def split_pred(self, cls_preds, reg_preds, base_xyz):\n        \"\"\"Split predicted features to specific parts.\n\n        Args:\n            cls_preds (torch.Tensor): Class predicted features to split.\n            reg_preds (torch.Tensor): Regression predicted features to split.\n            base_xyz (torch.Tensor): Coordinates of points.\n\n        Returns:\n            dict[str, torch.Tensor]: Split results.\n        \"\"\"\n        results = {}\n        start, end = 0, 0\n\n        cls_preds_trans = cls_preds.transpose(2, 1)\n        reg_preds_trans = reg_preds.transpose(2, 1)\n\n        # decode center\n        end += 3\n        # (batch_size, num_proposal, 3)\n        results['center'] = base_xyz + \\\n            reg_preds_trans[..., start:end].contiguous()\n        start = end\n\n        # decode direction\n        end += self.num_dir_bins\n        results['dir_class'] = reg_preds_trans[..., start:end].contiguous()\n        start = end\n\n        end += self.num_dir_bins\n        dir_res_norm = reg_preds_trans[..., start:end].contiguous()\n        start = end\n\n        results['dir_res_norm'] = dir_res_norm\n        results['dir_res'] = dir_res_norm * (np.pi / self.num_dir_bins)\n\n        # decode size\n        end += self.num_sizes\n        results['size_class'] = reg_preds_trans[..., start:end].contiguous()\n        start = end\n\n        end += self.num_sizes * 3\n        size_res_norm = reg_preds_trans[..., start:end]\n        batch_size, num_proposal = reg_preds_trans.shape[:2]\n        size_res_norm = size_res_norm.view(\n            [batch_size, num_proposal, self.num_sizes, 3])\n        start = end\n\n        results['size_res_norm'] = size_res_norm.contiguous()\n        mean_sizes = reg_preds.new_tensor(self.mean_sizes)\n        results['size_res'] = (\n            size_res_norm * mean_sizes.unsqueeze(0).unsqueeze(0))\n\n        # decode objectness score\n        start = 0\n        end = 2\n        results['obj_scores'] = cls_preds_trans[..., start:end].contiguous()\n        start = end\n\n        # decode semantic score\n        results['sem_scores'] = cls_preds_trans[..., start:].contiguous()\n\n        return results\n\n    def angle2class(self, angle):\n        \"\"\"Convert continuous angle to a discrete class and a residual.\n\n        Convert continuous angle to a discrete class and a small\n        regression number from class center angle to current angle.\n\n        Args:\n            angle (torch.Tensor): Angle is from 0-2pi (or -pi~pi),\n                class center at 0, 1*(2pi/N), 2*(2pi/N) ...  (N-1)*(2pi/N).\n\n        Returns:\n            tuple: Encoded discrete class and residual.\n        \"\"\"\n        angle = angle % (2 * np.pi)\n        angle_per_class = 2 * np.pi / float(self.num_dir_bins)\n        shifted_angle = (angle + angle_per_class / 2) % (2 * np.pi)\n        angle_cls = shifted_angle // angle_per_class\n        angle_res = shifted_angle - (\n            angle_cls * angle_per_class + angle_per_class / 2)\n        return angle_cls.long(), angle_res\n\n    def class2angle(self, angle_cls, angle_res, limit_period=True):\n        \"\"\"Inverse function to angle2class.\n\n        Args:\n            angle_cls (torch.Tensor): Angle class to decode.\n            angle_res (torch.Tensor): Angle residual to decode.\n            limit_period (bool): Whether to limit angle to [-pi, pi].\n\n        Returns:\n            torch.Tensor: Angle decoded from angle_cls and angle_res.\n        \"\"\"\n        angle_per_class = 2 * np.pi / float(self.num_dir_bins)\n        angle_center = angle_cls.float() * angle_per_class\n        angle = angle_center + angle_res\n        if limit_period:\n            angle[angle > np.pi] -= 2 * np.pi\n        return angle\n"
  },
  {
    "path": "mmdet3d/core/bbox/coders/pgd_bbox_coder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nfrom torch.nn import functional as F\n\nfrom mmdet.core.bbox.builder import BBOX_CODERS\nfrom .fcos3d_bbox_coder import FCOS3DBBoxCoder\n\n\n@BBOX_CODERS.register_module()\nclass PGDBBoxCoder(FCOS3DBBoxCoder):\n    \"\"\"Bounding box coder for PGD.\"\"\"\n\n    def encode(self, gt_bboxes_3d, gt_labels_3d, gt_bboxes, gt_labels):\n        # TODO: refactor the encoder codes in the FCOS3D and PGD head\n        pass\n\n    def decode_2d(self,\n                  bbox,\n                  scale,\n                  stride,\n                  max_regress_range,\n                  training,\n                  pred_keypoints=False,\n                  pred_bbox2d=True):\n        \"\"\"Decode regressed 2D attributes.\n\n        Args:\n            bbox (torch.Tensor): Raw bounding box predictions in shape\n                [N, C, H, W].\n            scale (tuple[`Scale`]): Learnable scale parameters.\n            stride (int): Stride for a specific feature level.\n            max_regress_range (int): Maximum regression range for a specific\n                feature level.\n            training (bool): Whether the decoding is in the training\n                procedure.\n            pred_keypoints (bool, optional): Whether to predict keypoints.\n                Defaults to False.\n            pred_bbox2d (bool, optional): Whether to predict 2D bounding\n                boxes. Defaults to False.\n\n        Returns:\n            torch.Tensor: Decoded boxes.\n        \"\"\"\n        clone_bbox = bbox.clone()\n        if pred_keypoints:\n            scale_kpts = scale[3]\n            # 2 dimension of offsets x 8 corners of a 3D bbox\n            bbox[:, self.bbox_code_size:self.bbox_code_size + 16] = \\\n                torch.tanh(scale_kpts(clone_bbox[\n                    :, self.bbox_code_size:self.bbox_code_size + 16]).float())\n\n        if pred_bbox2d:\n            scale_bbox2d = scale[-1]\n            # The last four dimensions are offsets to four sides of a 2D bbox\n            bbox[:, -4:] = scale_bbox2d(clone_bbox[:, -4:]).float()\n\n        if self.norm_on_bbox:\n            if pred_bbox2d:\n                bbox[:, -4:] = F.relu(bbox.clone()[:, -4:])\n            if not training:\n                if pred_keypoints:\n                    bbox[\n                        :, self.bbox_code_size:self.bbox_code_size + 16] *= \\\n                           max_regress_range\n                if pred_bbox2d:\n                    bbox[:, -4:] *= stride\n        else:\n            if pred_bbox2d:\n                bbox[:, -4:] = bbox.clone()[:, -4:].exp()\n        return bbox\n\n    def decode_prob_depth(self, depth_cls_preds, depth_range, depth_unit,\n                          division, num_depth_cls):\n        \"\"\"Decode probabilistic depth map.\n\n        Args:\n            depth_cls_preds (torch.Tensor): Depth probabilistic map in shape\n                [..., self.num_depth_cls] (raw output before softmax).\n            depth_range (tuple[float]): Range of depth estimation.\n            depth_unit (int): Unit of depth range division.\n            division (str): Depth division method. Options include 'uniform',\n                'linear', 'log', 'loguniform'.\n            num_depth_cls (int): Number of depth classes.\n\n        Returns:\n            torch.Tensor: Decoded probabilistic depth estimation.\n        \"\"\"\n        if division == 'uniform':\n            depth_multiplier = depth_unit * \\\n                depth_cls_preds.new_tensor(\n                    list(range(num_depth_cls))).reshape([1, -1])\n            prob_depth_preds = (F.softmax(depth_cls_preds.clone(), dim=-1) *\n                                depth_multiplier).sum(dim=-1)\n            return prob_depth_preds\n        elif division == 'linear':\n            split_pts = depth_cls_preds.new_tensor(list(\n                range(num_depth_cls))).reshape([1, -1])\n            depth_multiplier = depth_range[0] + (\n                depth_range[1] - depth_range[0]) / \\\n                (num_depth_cls * (num_depth_cls - 1)) * \\\n                (split_pts * (split_pts+1))\n            prob_depth_preds = (F.softmax(depth_cls_preds.clone(), dim=-1) *\n                                depth_multiplier).sum(dim=-1)\n            return prob_depth_preds\n        elif division == 'log':\n            split_pts = depth_cls_preds.new_tensor(list(\n                range(num_depth_cls))).reshape([1, -1])\n            start = max(depth_range[0], 1)\n            end = depth_range[1]\n            depth_multiplier = (np.log(start) +\n                                split_pts * np.log(end / start) /\n                                (num_depth_cls - 1)).exp()\n            prob_depth_preds = (F.softmax(depth_cls_preds.clone(), dim=-1) *\n                                depth_multiplier).sum(dim=-1)\n            return prob_depth_preds\n        elif division == 'loguniform':\n            split_pts = depth_cls_preds.new_tensor(list(\n                range(num_depth_cls))).reshape([1, -1])\n            start = max(depth_range[0], 1)\n            end = depth_range[1]\n            log_multiplier = np.log(start) + \\\n                split_pts * np.log(end / start) / (num_depth_cls - 1)\n            prob_depth_preds = (F.softmax(depth_cls_preds.clone(), dim=-1) *\n                                log_multiplier).sum(dim=-1).exp()\n            return prob_depth_preds\n        else:\n            raise NotImplementedError\n"
  },
  {
    "path": "mmdet3d/core/bbox/coders/point_xyzwhlr_bbox_coder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\n\nfrom mmdet.core.bbox import BaseBBoxCoder\nfrom mmdet.core.bbox.builder import BBOX_CODERS\n\n\n@BBOX_CODERS.register_module()\nclass PointXYZWHLRBBoxCoder(BaseBBoxCoder):\n    \"\"\"Point based bbox coder for 3D boxes.\n\n    Args:\n        code_size (int): The dimension of boxes to be encoded.\n        use_mean_size (bool, optional): Whether using anchors based on class.\n            Defaults to True.\n        mean_size (list[list[float]], optional): Mean size of bboxes in\n            each class. Defaults to None.\n    \"\"\"\n\n    def __init__(self, code_size=7, use_mean_size=True, mean_size=None):\n        super(PointXYZWHLRBBoxCoder, self).__init__()\n        self.code_size = code_size\n        self.use_mean_size = use_mean_size\n        if self.use_mean_size:\n            self.mean_size = torch.from_numpy(np.array(mean_size)).float()\n            assert self.mean_size.min() > 0, \\\n                f'The min of mean_size should > 0, however currently it is '\\\n                f'{self.mean_size.min()}, please check it in your config.'\n\n    def encode(self, gt_bboxes_3d, points, gt_labels_3d=None):\n        \"\"\"Encode ground truth to prediction targets.\n\n        Args:\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth bboxes\n                with shape (N, 7 + C).\n            points (torch.Tensor): Point cloud with shape (N, 3).\n            gt_labels_3d (torch.Tensor, optional): Ground truth classes.\n                Defaults to None.\n\n        Returns:\n            torch.Tensor: Encoded boxes with shape (N, 8 + C).\n        \"\"\"\n        gt_bboxes_3d[:, 3:6] = torch.clamp_min(gt_bboxes_3d[:, 3:6], min=1e-5)\n\n        xg, yg, zg, dxg, dyg, dzg, rg, *cgs = torch.split(\n            gt_bboxes_3d, 1, dim=-1)\n        xa, ya, za = torch.split(points, 1, dim=-1)\n\n        if self.use_mean_size:\n            assert gt_labels_3d.max() <= self.mean_size.shape[0] - 1, \\\n                f'the max gt label {gt_labels_3d.max()} is bigger than' \\\n                f'anchor types {self.mean_size.shape[0] - 1}.'\n            self.mean_size = self.mean_size.to(gt_labels_3d.device)\n            point_anchor_size = self.mean_size[gt_labels_3d]\n            dxa, dya, dza = torch.split(point_anchor_size, 1, dim=-1)\n            diagonal = torch.sqrt(dxa**2 + dya**2)\n            xt = (xg - xa) / diagonal\n            yt = (yg - ya) / diagonal\n            zt = (zg - za) / dza\n            dxt = torch.log(dxg / dxa)\n            dyt = torch.log(dyg / dya)\n            dzt = torch.log(dzg / dza)\n        else:\n            xt = (xg - xa)\n            yt = (yg - ya)\n            zt = (zg - za)\n            dxt = torch.log(dxg)\n            dyt = torch.log(dyg)\n            dzt = torch.log(dzg)\n\n        return torch.cat(\n            [xt, yt, zt, dxt, dyt, dzt,\n             torch.cos(rg),\n             torch.sin(rg), *cgs],\n            dim=-1)\n\n    def decode(self, box_encodings, points, pred_labels_3d=None):\n        \"\"\"Decode predicted parts and points to bbox3d.\n\n        Args:\n            box_encodings (torch.Tensor): Encoded boxes with shape (N, 8 + C).\n            points (torch.Tensor): Point cloud with shape (N, 3).\n            pred_labels_3d (torch.Tensor): Bbox predicted labels (N, M).\n\n        Returns:\n            torch.Tensor: Decoded boxes with shape (N, 7 + C)\n        \"\"\"\n        xt, yt, zt, dxt, dyt, dzt, cost, sint, *cts = torch.split(\n            box_encodings, 1, dim=-1)\n        xa, ya, za = torch.split(points, 1, dim=-1)\n\n        if self.use_mean_size:\n            assert pred_labels_3d.max() <= self.mean_size.shape[0] - 1, \\\n                f'The max pred label {pred_labels_3d.max()} is bigger than' \\\n                f'anchor types {self.mean_size.shape[0] - 1}.'\n            self.mean_size = self.mean_size.to(pred_labels_3d.device)\n            point_anchor_size = self.mean_size[pred_labels_3d]\n            dxa, dya, dza = torch.split(point_anchor_size, 1, dim=-1)\n            diagonal = torch.sqrt(dxa**2 + dya**2)\n            xg = xt * diagonal + xa\n            yg = yt * diagonal + ya\n            zg = zt * dza + za\n\n            dxg = torch.exp(dxt) * dxa\n            dyg = torch.exp(dyt) * dya\n            dzg = torch.exp(dzt) * dza\n        else:\n            xg = xt + xa\n            yg = yt + ya\n            zg = zt + za\n            dxg, dyg, dzg = torch.split(\n                torch.exp(box_encodings[..., 3:6]), 1, dim=-1)\n\n        rg = torch.atan2(sint, cost)\n\n        return torch.cat([xg, yg, zg, dxg, dyg, dzg, rg, *cts], dim=-1)\n"
  },
  {
    "path": "mmdet3d/core/bbox/coders/smoke_bbox_coder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\n\nfrom mmdet.core.bbox import BaseBBoxCoder\nfrom mmdet.core.bbox.builder import BBOX_CODERS\n\n\n@BBOX_CODERS.register_module()\nclass SMOKECoder(BaseBBoxCoder):\n    \"\"\"Bbox Coder for SMOKE.\n\n    Args:\n        base_depth (tuple[float]): Depth references for decode box depth.\n        base_dims (tuple[tuple[float]]): Dimension references [l, h, w]\n            for decode box dimension for each category.\n        code_size (int): The dimension of boxes to be encoded.\n    \"\"\"\n\n    def __init__(self, base_depth, base_dims, code_size):\n        super(SMOKECoder, self).__init__()\n        self.base_depth = base_depth\n        self.base_dims = base_dims\n        self.bbox_code_size = code_size\n\n    def encode(self, locations, dimensions, orientations, input_metas):\n        \"\"\"Encode CameraInstance3DBoxes by locations, dimensions, orientations.\n\n        Args:\n            locations (Tensor): Center location for 3D boxes.\n                (N, 3)\n            dimensions (Tensor): Dimensions for 3D boxes.\n                shape (N, 3)\n            orientations (Tensor): Orientations for 3D boxes.\n                shape (N, 1)\n            input_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n\n        Return:\n            :obj:`CameraInstance3DBoxes`: 3D bboxes of batch images,\n                shape (N, bbox_code_size).\n        \"\"\"\n\n        bboxes = torch.cat((locations, dimensions, orientations), dim=1)\n        assert bboxes.shape[1] == self.bbox_code_size, 'bboxes shape dose not'\\\n            'match the bbox_code_size.'\n        batch_bboxes = input_metas[0]['box_type_3d'](\n            bboxes, box_dim=self.bbox_code_size)\n\n        return batch_bboxes\n\n    def decode(self,\n               reg,\n               points,\n               labels,\n               cam2imgs,\n               trans_mats,\n               locations=None):\n        \"\"\"Decode regression into locations, dimensions, orientations.\n\n        Args:\n            reg (Tensor): Batch regression for each predict center2d point.\n                shape: (batch * K (max_objs), C)\n            points(Tensor): Batch projected bbox centers on image plane.\n                shape: (batch * K (max_objs) , 2)\n            labels (Tensor): Batch predict class label for each predict\n                center2d point.\n                shape: (batch, K (max_objs))\n            cam2imgs (Tensor): Batch images' camera intrinsic matrix.\n                shape: kitti (batch, 4, 4)  nuscenes (batch, 3, 3)\n            trans_mats (Tensor): transformation matrix from original image\n                to feature map.\n                shape: (batch, 3, 3)\n            locations (None | Tensor): if locations is None, this function\n                is used to decode while inference, otherwise, it's used while\n                training using the ground truth 3d bbox locations.\n                shape: (batch * K (max_objs), 3)\n\n        Return:\n            tuple(Tensor): The tuple has components below:\n                - locations (Tensor): Centers of 3D boxes.\n                    shape: (batch * K (max_objs), 3)\n                - dimensions (Tensor): Dimensions of 3D boxes.\n                    shape: (batch * K (max_objs), 3)\n                - orientations (Tensor): Orientations of 3D\n                    boxes.\n                    shape: (batch * K (max_objs), 1)\n        \"\"\"\n        depth_offsets = reg[:, 0]\n        centers2d_offsets = reg[:, 1:3]\n        dimensions_offsets = reg[:, 3:6]\n        orientations = reg[:, 6:8]\n        depths = self._decode_depth(depth_offsets)\n        # get the 3D Bounding box's center location.\n        pred_locations = self._decode_location(points, centers2d_offsets,\n                                               depths, cam2imgs, trans_mats)\n        pred_dimensions = self._decode_dimension(labels, dimensions_offsets)\n        if locations is None:\n            pred_orientations = self._decode_orientation(\n                orientations, pred_locations)\n        else:\n            pred_orientations = self._decode_orientation(\n                orientations, locations)\n\n        return pred_locations, pred_dimensions, pred_orientations\n\n    def _decode_depth(self, depth_offsets):\n        \"\"\"Transform depth offset to depth.\"\"\"\n        base_depth = depth_offsets.new_tensor(self.base_depth)\n        depths = depth_offsets * base_depth[1] + base_depth[0]\n\n        return depths\n\n    def _decode_location(self, points, centers2d_offsets, depths, cam2imgs,\n                         trans_mats):\n        \"\"\"Retrieve objects location in camera coordinate based on projected\n        points.\n\n        Args:\n            points (Tensor): Projected points on feature map in (x, y)\n                shape: (batch * K, 2)\n            centers2d_offset (Tensor): Project points offset in\n                (delta_x, delta_y). shape: (batch * K, 2)\n            depths (Tensor): Object depth z.\n                shape: (batch * K)\n            cam2imgs (Tensor): Batch camera intrinsics matrix.\n                shape: kitti (batch, 4, 4)  nuscenes (batch, 3, 3)\n            trans_mats (Tensor): transformation matrix from original image\n                to feature map.\n                shape: (batch, 3, 3)\n        \"\"\"\n        # number of points\n        N = centers2d_offsets.shape[0]\n        # batch_size\n        N_batch = cam2imgs.shape[0]\n        batch_id = torch.arange(N_batch).unsqueeze(1)\n        obj_id = batch_id.repeat(1, N // N_batch).flatten()\n        trans_mats_inv = trans_mats.inverse()[obj_id]\n        cam2imgs_inv = cam2imgs.inverse()[obj_id]\n        centers2d = points + centers2d_offsets\n        centers2d_extend = torch.cat((centers2d, centers2d.new_ones(N, 1)),\n                                     dim=1)\n        # expand project points as [N, 3, 1]\n        centers2d_extend = centers2d_extend.unsqueeze(-1)\n        # transform project points back on original image\n        centers2d_img = torch.matmul(trans_mats_inv, centers2d_extend)\n        centers2d_img = centers2d_img * depths.view(N, -1, 1)\n        if cam2imgs.shape[1] == 4:\n            centers2d_img = torch.cat(\n                (centers2d_img, centers2d.new_ones(N, 1, 1)), dim=1)\n        locations = torch.matmul(cam2imgs_inv, centers2d_img).squeeze(2)\n\n        return locations[:, :3]\n\n    def _decode_dimension(self, labels, dims_offset):\n        \"\"\"Transform dimension offsets to dimension according to its category.\n\n        Args:\n            labels (Tensor): Each points' category id.\n                shape: (N, K)\n            dims_offset (Tensor): Dimension offsets.\n                shape: (N, 3)\n        \"\"\"\n        labels = labels.flatten().long()\n        base_dims = dims_offset.new_tensor(self.base_dims)\n        dims_select = base_dims[labels, :]\n        dimensions = dims_offset.exp() * dims_select\n\n        return dimensions\n\n    def _decode_orientation(self, ori_vector, locations):\n        \"\"\"Retrieve object orientation.\n\n        Args:\n            ori_vector (Tensor): Local orientation in [sin, cos] format.\n                shape: (N, 2)\n            locations (Tensor): Object location.\n                shape: (N, 3)\n\n        Return:\n            Tensor: yaw(Orientation). Notice that the yaw's\n                range is [-np.pi, np.pi].\n                shape：(N, 1）\n        \"\"\"\n        assert len(ori_vector) == len(locations)\n        locations = locations.view(-1, 3)\n        rays = torch.atan(locations[:, 0] / (locations[:, 2] + 1e-7))\n        alphas = torch.atan(ori_vector[:, 0] / (ori_vector[:, 1] + 1e-7))\n\n        # get cosine value positive and negative index.\n        cos_pos_inds = (ori_vector[:, 1] >= 0).nonzero(as_tuple=False)\n        cos_neg_inds = (ori_vector[:, 1] < 0).nonzero(as_tuple=False)\n\n        alphas[cos_pos_inds] -= np.pi / 2\n        alphas[cos_neg_inds] += np.pi / 2\n        # retrieve object rotation y angle.\n        yaws = alphas + rays\n\n        larger_inds = (yaws > np.pi).nonzero(as_tuple=False)\n        small_inds = (yaws < -np.pi).nonzero(as_tuple=False)\n\n        if len(larger_inds) != 0:\n            yaws[larger_inds] -= 2 * np.pi\n        if len(small_inds) != 0:\n            yaws[small_inds] += 2 * np.pi\n\n        yaws = yaws.unsqueeze(-1)\n        return yaws\n"
  },
  {
    "path": "mmdet3d/core/bbox/iou_calculators/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .iou3d_calculator import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D,\n                               BboxOverlapsNearest3D,\n                               axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d,\n                               bbox_overlaps_nearest_3d)\n\n__all__ = [\n    'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',\n    'bbox_overlaps_3d', 'AxisAlignedBboxOverlaps3D',\n    'axis_aligned_bbox_overlaps_3d'\n]\n"
  },
  {
    "path": "mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet.core.bbox import bbox_overlaps\nfrom mmdet.core.bbox.iou_calculators.builder import IOU_CALCULATORS\nfrom ..structures import get_box_type\n\n\n@IOU_CALCULATORS.register_module()\nclass BboxOverlapsNearest3D(object):\n    \"\"\"Nearest 3D IoU Calculator.\n\n    Note:\n        This IoU calculator first finds the nearest 2D boxes in bird eye view\n        (BEV), and then calculates the 2D IoU using :meth:`bbox_overlaps`.\n\n    Args:\n        coordinate (str): 'camera', 'lidar', or 'depth' coordinate system.\n    \"\"\"\n\n    def __init__(self, coordinate='lidar'):\n        assert coordinate in ['camera', 'lidar', 'depth']\n        self.coordinate = coordinate\n\n    def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False):\n        \"\"\"Calculate nearest 3D IoU.\n\n        Note:\n            If ``is_aligned`` is ``False``, then it calculates the ious between\n            each bbox of bboxes1 and bboxes2, otherwise it calculates the ious\n            between each aligned pair of bboxes1 and bboxes2.\n\n        Args:\n            bboxes1 (torch.Tensor): shape (N, 7+N)\n                [x, y, z, x_size, y_size, z_size, ry, v].\n            bboxes2 (torch.Tensor): shape (M, 7+N)\n                [x, y, z, x_size, y_size, z_size, ry, v].\n            mode (str): \"iou\" (intersection over union) or iof\n                (intersection over foreground).\n            is_aligned (bool): Whether the calculation is aligned.\n\n        Return:\n            torch.Tensor: If ``is_aligned`` is ``True``, return ious between\n                bboxes1 and bboxes2 with shape (M, N). If ``is_aligned`` is\n                ``False``, return shape is M.\n        \"\"\"\n        return bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode, is_aligned,\n                                        self.coordinate)\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(coordinate={self.coordinate}'\n        return repr_str\n\n\n@IOU_CALCULATORS.register_module()\nclass BboxOverlaps3D(object):\n    \"\"\"3D IoU Calculator.\n\n    Args:\n        coordinate (str): The coordinate system, valid options are\n            'camera', 'lidar', and 'depth'.\n    \"\"\"\n\n    def __init__(self, coordinate):\n        assert coordinate in ['camera', 'lidar', 'depth']\n        self.coordinate = coordinate\n\n    def __call__(self, bboxes1, bboxes2, mode='iou'):\n        \"\"\"Calculate 3D IoU using cuda implementation.\n\n        Note:\n            This function calculate the IoU of 3D boxes based on their volumes.\n            IoU calculator ``:class:BboxOverlaps3D`` uses this function to\n            calculate the actual 3D IoUs of boxes.\n\n        Args:\n            bboxes1 (torch.Tensor): with shape (N, 7+C),\n                (x, y, z, x_size, y_size, z_size, ry, v*).\n            bboxes2 (torch.Tensor): with shape (M, 7+C),\n                (x, y, z, x_size, y_size, z_size, ry, v*).\n            mode (str): \"iou\" (intersection over union) or\n                iof (intersection over foreground).\n\n        Return:\n            torch.Tensor: Bbox overlaps results of bboxes1 and bboxes2\n                with shape (M, N) (aligned mode is not supported currently).\n        \"\"\"\n        return bbox_overlaps_3d(bboxes1, bboxes2, mode, self.coordinate)\n\n    def __repr__(self):\n        \"\"\"str: return a string that describes the module\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(coordinate={self.coordinate}'\n        return repr_str\n\n\ndef bbox_overlaps_nearest_3d(bboxes1,\n                             bboxes2,\n                             mode='iou',\n                             is_aligned=False,\n                             coordinate='lidar'):\n    \"\"\"Calculate nearest 3D IoU.\n\n    Note:\n        This function first finds the nearest 2D boxes in bird eye view\n        (BEV), and then calculates the 2D IoU using :meth:`bbox_overlaps`.\n        This IoU calculator :class:`BboxOverlapsNearest3D` uses this\n        function to calculate IoUs of boxes.\n\n        If ``is_aligned`` is ``False``, then it calculates the ious between\n        each bbox of bboxes1 and bboxes2, otherwise the ious between each\n        aligned pair of bboxes1 and bboxes2.\n\n    Args:\n        bboxes1 (torch.Tensor): with shape (N, 7+C),\n            (x, y, z, x_size, y_size, z_size, ry, v*).\n        bboxes2 (torch.Tensor): with shape (M, 7+C),\n            (x, y, z, x_size, y_size, z_size, ry, v*).\n        mode (str): \"iou\" (intersection over union) or iof\n            (intersection over foreground).\n        is_aligned (bool): Whether the calculation is aligned\n\n    Return:\n        torch.Tensor: If ``is_aligned`` is ``True``, return ious between\n            bboxes1 and bboxes2 with shape (M, N). If ``is_aligned`` is\n            ``False``, return shape is M.\n    \"\"\"\n    assert bboxes1.size(-1) == bboxes2.size(-1) >= 7\n\n    box_type, _ = get_box_type(coordinate)\n\n    bboxes1 = box_type(bboxes1, box_dim=bboxes1.shape[-1])\n    bboxes2 = box_type(bboxes2, box_dim=bboxes2.shape[-1])\n\n    # Change the bboxes to bev\n    # box conversion and iou calculation in torch version on CUDA\n    # is 10x faster than that in numpy version\n    bboxes1_bev = bboxes1.nearest_bev\n    bboxes2_bev = bboxes2.nearest_bev\n\n    ret = bbox_overlaps(\n        bboxes1_bev, bboxes2_bev, mode=mode, is_aligned=is_aligned)\n    return ret\n\n\ndef bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'):\n    \"\"\"Calculate 3D IoU using cuda implementation.\n\n    Note:\n        This function calculates the IoU of 3D boxes based on their volumes.\n        IoU calculator :class:`BboxOverlaps3D` uses this function to\n        calculate the actual IoUs of boxes.\n\n    Args:\n        bboxes1 (torch.Tensor): with shape (N, 7+C),\n            (x, y, z, x_size, y_size, z_size, ry, v*).\n        bboxes2 (torch.Tensor): with shape (M, 7+C),\n            (x, y, z, x_size, y_size, z_size, ry, v*).\n        mode (str): \"iou\" (intersection over union) or\n            iof (intersection over foreground).\n        coordinate (str): 'camera' or 'lidar' coordinate system.\n\n    Return:\n        torch.Tensor: Bbox overlaps results of bboxes1 and bboxes2\n            with shape (M, N) (aligned mode is not supported currently).\n    \"\"\"\n    assert bboxes1.size(-1) == bboxes2.size(-1) >= 7\n\n    box_type, _ = get_box_type(coordinate)\n\n    bboxes1 = box_type(bboxes1, box_dim=bboxes1.shape[-1])\n    bboxes2 = box_type(bboxes2, box_dim=bboxes2.shape[-1])\n\n    return bboxes1.overlaps(bboxes1, bboxes2, mode=mode)\n\n\n@IOU_CALCULATORS.register_module()\nclass AxisAlignedBboxOverlaps3D(object):\n    \"\"\"Axis-aligned 3D Overlaps (IoU) Calculator.\"\"\"\n\n    def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False):\n        \"\"\"Calculate IoU between 2D bboxes.\n\n        Args:\n            bboxes1 (Tensor): shape (B, m, 6) in <x1, y1, z1, x2, y2, z2>\n                format or empty.\n            bboxes2 (Tensor): shape (B, n, 6) in <x1, y1, z1, x2, y2, z2>\n                format or empty.\n                B indicates the batch dim, in shape (B1, B2, ..., Bn).\n                If ``is_aligned`` is ``True``, then m and n must be equal.\n            mode (str): \"iou\" (intersection over union) or \"giou\" (generalized\n                intersection over union).\n            is_aligned (bool, optional): If True, then m and n must be equal.\n                Defaults to False.\n        Returns:\n            Tensor: shape (m, n) if ``is_aligned`` is False else shape (m,)\n        \"\"\"\n        assert bboxes1.size(-1) == bboxes2.size(-1) == 6\n        return axis_aligned_bbox_overlaps_3d(bboxes1, bboxes2, mode,\n                                             is_aligned)\n\n    def __repr__(self):\n        \"\"\"str: a string describing the module\"\"\"\n        repr_str = self.__class__.__name__ + '()'\n        return repr_str\n\n\ndef axis_aligned_bbox_overlaps_3d(bboxes1,\n                                  bboxes2,\n                                  mode='iou',\n                                  is_aligned=False,\n                                  eps=1e-6):\n    \"\"\"Calculate overlap between two set of axis aligned 3D bboxes. If\n    ``is_aligned`` is ``False``, then calculate the overlaps between each bbox\n    of bboxes1 and bboxes2, otherwise the overlaps between each aligned pair of\n    bboxes1 and bboxes2.\n\n    Args:\n        bboxes1 (Tensor): shape (B, m, 6) in <x1, y1, z1, x2, y2, z2>\n            format or empty.\n        bboxes2 (Tensor): shape (B, n, 6) in <x1, y1, z1, x2, y2, z2>\n            format or empty.\n            B indicates the batch dim, in shape (B1, B2, ..., Bn).\n            If ``is_aligned`` is ``True``, then m and n must be equal.\n        mode (str): \"iou\" (intersection over union) or \"giou\" (generalized\n            intersection over union).\n        is_aligned (bool, optional): If True, then m and n must be equal.\n            Defaults to False.\n        eps (float, optional): A value added to the denominator for numerical\n            stability. Defaults to 1e-6.\n\n    Returns:\n        Tensor: shape (m, n) if ``is_aligned`` is False else shape (m,)\n\n    Example:\n        >>> bboxes1 = torch.FloatTensor([\n        >>>     [0, 0, 0, 10, 10, 10],\n        >>>     [10, 10, 10, 20, 20, 20],\n        >>>     [32, 32, 32, 38, 40, 42],\n        >>> ])\n        >>> bboxes2 = torch.FloatTensor([\n        >>>     [0, 0, 0, 10, 20, 20],\n        >>>     [0, 10, 10, 10, 19, 20],\n        >>>     [10, 10, 10, 20, 20, 20],\n        >>> ])\n        >>> overlaps = axis_aligned_bbox_overlaps_3d(bboxes1, bboxes2)\n        >>> assert overlaps.shape == (3, 3)\n        >>> overlaps = bbox_overlaps(bboxes1, bboxes2, is_aligned=True)\n        >>> assert overlaps.shape == (3, )\n    Example:\n        >>> empty = torch.empty(0, 6)\n        >>> nonempty = torch.FloatTensor([[0, 0, 0, 10, 9, 10]])\n        >>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1)\n        >>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0)\n        >>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0)\n    \"\"\"\n\n    assert mode in ['iou', 'giou'], f'Unsupported mode {mode}'\n    # Either the boxes are empty or the length of boxes's last dimension is 6\n    assert (bboxes1.size(-1) == 6 or bboxes1.size(0) == 0)\n    assert (bboxes2.size(-1) == 6 or bboxes2.size(0) == 0)\n\n    # Batch dim must be the same\n    # Batch dim: (B1, B2, ... Bn)\n    assert bboxes1.shape[:-2] == bboxes2.shape[:-2]\n    batch_shape = bboxes1.shape[:-2]\n\n    rows = bboxes1.size(-2)\n    cols = bboxes2.size(-2)\n    if is_aligned:\n        assert rows == cols\n\n    if rows * cols == 0:\n        if is_aligned:\n            return bboxes1.new(batch_shape + (rows, ))\n        else:\n            return bboxes1.new(batch_shape + (rows, cols))\n\n    area1 = (bboxes1[..., 3] -\n             bboxes1[..., 0]) * (bboxes1[..., 4] - bboxes1[..., 1]) * (\n                 bboxes1[..., 5] - bboxes1[..., 2])\n    area2 = (bboxes2[..., 3] -\n             bboxes2[..., 0]) * (bboxes2[..., 4] - bboxes2[..., 1]) * (\n                 bboxes2[..., 5] - bboxes2[..., 2])\n\n    if is_aligned:\n        lt = torch.max(bboxes1[..., :3], bboxes2[..., :3])  # [B, rows, 3]\n        rb = torch.min(bboxes1[..., 3:], bboxes2[..., 3:])  # [B, rows, 3]\n\n        wh = (rb - lt).clamp(min=0)  # [B, rows, 2]\n        overlap = wh[..., 0] * wh[..., 1] * wh[..., 2]\n\n        if mode in ['iou', 'giou']:\n            union = area1 + area2 - overlap\n        else:\n            union = area1\n        if mode == 'giou':\n            enclosed_lt = torch.min(bboxes1[..., :3], bboxes2[..., :3])\n            enclosed_rb = torch.max(bboxes1[..., 3:], bboxes2[..., 3:])\n    else:\n        lt = torch.max(bboxes1[..., :, None, :3],\n                       bboxes2[..., None, :, :3])  # [B, rows, cols, 3]\n        rb = torch.min(bboxes1[..., :, None, 3:],\n                       bboxes2[..., None, :, 3:])  # [B, rows, cols, 3]\n\n        wh = (rb - lt).clamp(min=0)  # [B, rows, cols, 3]\n        overlap = wh[..., 0] * wh[..., 1] * wh[..., 2]\n\n        if mode in ['iou', 'giou']:\n            union = area1[..., None] + area2[..., None, :] - overlap\n        if mode == 'giou':\n            enclosed_lt = torch.min(bboxes1[..., :, None, :3],\n                                    bboxes2[..., None, :, :3])\n            enclosed_rb = torch.max(bboxes1[..., :, None, 3:],\n                                    bboxes2[..., None, :, 3:])\n\n    eps = union.new_tensor([eps])\n    union = torch.max(union, eps)\n    ious = overlap / union\n    if mode in ['iou']:\n        return ious\n    # calculate gious\n    enclose_wh = (enclosed_rb - enclosed_lt).clamp(min=0)\n    enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1] * enclose_wh[..., 2]\n    enclose_area = torch.max(enclose_area, eps)\n    gious = ious - (enclose_area - union) / enclose_area\n    return gious\n"
  },
  {
    "path": "mmdet3d/core/bbox/samplers/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmdet.core.bbox.samplers import (BaseSampler, CombinedSampler,\n                                      InstanceBalancedPosSampler,\n                                      IoUBalancedNegSampler, OHEMSampler,\n                                      PseudoSampler, RandomSampler,\n                                      SamplingResult)\nfrom .iou_neg_piecewise_sampler import IoUNegPiecewiseSampler\n\n__all__ = [\n    'BaseSampler', 'PseudoSampler', 'RandomSampler',\n    'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',\n    'OHEMSampler', 'SamplingResult', 'IoUNegPiecewiseSampler'\n]\n"
  },
  {
    "path": "mmdet3d/core/bbox/samplers/iou_neg_piecewise_sampler.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet.core.bbox.builder import BBOX_SAMPLERS\nfrom . import RandomSampler, SamplingResult\n\n\n@BBOX_SAMPLERS.register_module()\nclass IoUNegPiecewiseSampler(RandomSampler):\n    \"\"\"IoU Piece-wise Sampling.\n\n    Sampling negative proposals according to a list of IoU thresholds.\n    The negative proposals are divided into several pieces according\n    to `neg_iou_piece_thrs`. And the ratio of each piece is indicated\n    by `neg_piece_fractions`.\n\n    Args:\n        num (int): Number of proposals.\n        pos_fraction (float): The fraction of positive proposals.\n        neg_piece_fractions (list): A list contains fractions that indicates\n            the ratio of each piece of total negative samplers.\n        neg_iou_piece_thrs (list): A list contains IoU thresholds that\n            indicate the upper bound of this piece.\n        neg_pos_ub (float): The total ratio to limit the upper bound\n            number of negative samples.\n        add_gt_as_proposals (bool): Whether to add gt as proposals.\n    \"\"\"\n\n    def __init__(self,\n                 num,\n                 pos_fraction=None,\n                 neg_piece_fractions=None,\n                 neg_iou_piece_thrs=None,\n                 neg_pos_ub=-1,\n                 add_gt_as_proposals=False,\n                 return_iou=False):\n        super(IoUNegPiecewiseSampler,\n              self).__init__(num, pos_fraction, neg_pos_ub,\n                             add_gt_as_proposals)\n        assert isinstance(neg_piece_fractions, list)\n        assert len(neg_piece_fractions) == len(neg_iou_piece_thrs)\n        self.neg_piece_fractions = neg_piece_fractions\n        self.neg_iou_thr = neg_iou_piece_thrs\n        self.return_iou = return_iou\n        self.neg_piece_num = len(self.neg_piece_fractions)\n\n    def _sample_pos(self, assign_result, num_expected, **kwargs):\n        \"\"\"Randomly sample some positive samples.\"\"\"\n        pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False)\n        if pos_inds.numel() != 0:\n            pos_inds = pos_inds.squeeze(1)\n        if pos_inds.numel() <= num_expected:\n            return pos_inds\n        else:\n            return self.random_choice(pos_inds, num_expected)\n\n    def _sample_neg(self, assign_result, num_expected, **kwargs):\n        \"\"\"Randomly sample some negative samples.\"\"\"\n        neg_inds = torch.nonzero(assign_result.gt_inds == 0, as_tuple=False)\n        if neg_inds.numel() != 0:\n            neg_inds = neg_inds.squeeze(1)\n        if len(neg_inds) <= 0:\n            return neg_inds.squeeze(1)\n        else:\n            neg_inds_choice = neg_inds.new_zeros([0])\n            extend_num = 0\n            max_overlaps = assign_result.max_overlaps[neg_inds]\n\n            for piece_inds in range(self.neg_piece_num):\n                if piece_inds == self.neg_piece_num - 1:  # for the last piece\n                    piece_expected_num = num_expected - len(neg_inds_choice)\n                    min_iou_thr = 0\n                else:\n                    # if the numbers of negative samplers in previous\n                    # pieces are less than the expected number, extend\n                    # the same number in the current piece.\n                    piece_expected_num = int(\n                        num_expected *\n                        self.neg_piece_fractions[piece_inds]) + extend_num\n                    min_iou_thr = self.neg_iou_thr[piece_inds + 1]\n                max_iou_thr = self.neg_iou_thr[piece_inds]\n                piece_neg_inds = torch.nonzero(\n                    (max_overlaps >= min_iou_thr)\n                    & (max_overlaps < max_iou_thr),\n                    as_tuple=False).view(-1)\n\n                if len(piece_neg_inds) < piece_expected_num:\n                    neg_inds_choice = torch.cat(\n                        [neg_inds_choice, neg_inds[piece_neg_inds]], dim=0)\n                    extend_num += piece_expected_num - len(piece_neg_inds)\n\n                    # for the last piece\n                    if piece_inds == self.neg_piece_num - 1:\n                        extend_neg_num = num_expected - len(neg_inds_choice)\n                        # if the numbers of nagetive samples > 0, we will\n                        # randomly select num_expected samples in last piece\n                        if piece_neg_inds.numel() > 0:\n                            rand_idx = torch.randint(\n                                low=0,\n                                high=piece_neg_inds.numel(),\n                                size=(extend_neg_num, )).long()\n                            neg_inds_choice = torch.cat(\n                                [neg_inds_choice, piece_neg_inds[rand_idx]],\n                                dim=0)\n                        # if the numbers of nagetive samples == 0, we will\n                        # randomly select num_expected samples in all\n                        # previous pieces\n                        else:\n                            rand_idx = torch.randint(\n                                low=0,\n                                high=neg_inds_choice.numel(),\n                                size=(extend_neg_num, )).long()\n                            neg_inds_choice = torch.cat(\n                                [neg_inds_choice, neg_inds_choice[rand_idx]],\n                                dim=0)\n                else:\n                    piece_choice = self.random_choice(piece_neg_inds,\n                                                      piece_expected_num)\n                    neg_inds_choice = torch.cat(\n                        [neg_inds_choice, neg_inds[piece_choice]], dim=0)\n                    extend_num = 0\n            assert len(neg_inds_choice) == num_expected\n            return neg_inds_choice\n\n    def sample(self,\n               assign_result,\n               bboxes,\n               gt_bboxes,\n               gt_labels=None,\n               **kwargs):\n        \"\"\"Sample positive and negative bboxes.\n\n        This is a simple implementation of bbox sampling given candidates,\n        assigning results and ground truth bboxes.\n\n        Args:\n            assign_result (:obj:`AssignResult`): Bbox assigning results.\n            bboxes (torch.Tensor): Boxes to be sampled from.\n            gt_bboxes (torch.Tensor): Ground truth bboxes.\n            gt_labels (torch.Tensor, optional): Class labels of ground truth\n                bboxes.\n\n        Returns:\n            :obj:`SamplingResult`: Sampling result.\n        \"\"\"\n        if len(bboxes.shape) < 2:\n            bboxes = bboxes[None, :]\n\n        gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.bool)\n        if self.add_gt_as_proposals and len(gt_bboxes) > 0:\n            if gt_labels is None:\n                raise ValueError(\n                    'gt_labels must be given when add_gt_as_proposals is True')\n            bboxes = torch.cat([gt_bboxes, bboxes], dim=0)\n            assign_result.add_gt_(gt_labels)\n            gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.bool)\n            gt_flags = torch.cat([gt_ones, gt_flags])\n\n        num_expected_pos = int(self.num * self.pos_fraction)\n        pos_inds = self.pos_sampler._sample_pos(\n            assign_result, num_expected_pos, bboxes=bboxes, **kwargs)\n        # We found that sampled indices have duplicated items occasionally.\n        # (may be a bug of PyTorch)\n        pos_inds = pos_inds.unique()\n        num_sampled_pos = pos_inds.numel()\n        num_expected_neg = self.num - num_sampled_pos\n        if self.neg_pos_ub >= 0:\n            _pos = max(1, num_sampled_pos)\n            neg_upper_bound = int(self.neg_pos_ub * _pos)\n            if num_expected_neg > neg_upper_bound:\n                num_expected_neg = neg_upper_bound\n        neg_inds = self.neg_sampler._sample_neg(\n            assign_result, num_expected_neg, bboxes=bboxes, **kwargs)\n\n        sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,\n                                         assign_result, gt_flags)\n        if self.return_iou:\n            # PartA2 needs iou score to regression.\n            sampling_result.iou = assign_result.max_overlaps[torch.cat(\n                [pos_inds, neg_inds])]\n            sampling_result.iou.detach_()\n\n        return sampling_result\n"
  },
  {
    "path": "mmdet3d/core/bbox/structures/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .base_box3d import BaseInstance3DBoxes\nfrom .box_3d_mode import Box3DMode\nfrom .cam_box3d import CameraInstance3DBoxes\nfrom .coord_3d_mode import Coord3DMode\nfrom .depth_box3d import DepthInstance3DBoxes\nfrom .lidar_box3d import LiDARInstance3DBoxes\nfrom .custom_box import CustomBox\nfrom .utils import (get_box_type, get_proj_mat_by_coord_type, limit_period,\n                    mono_cam_box2vis, points_cam2img, points_img2cam,\n                    rotation_3d_in_axis, xywhr2xyxyr)\n\n__all__ = [\n    'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes',\n    'CameraInstance3DBoxes', 'DepthInstance3DBoxes', 'xywhr2xyxyr',\n    'get_box_type', 'rotation_3d_in_axis', 'limit_period', 'points_cam2img',\n    'points_img2cam', 'Coord3DMode', 'mono_cam_box2vis',\n    'get_proj_mat_by_coord_type'\n]\n"
  },
  {
    "path": "mmdet3d/core/bbox/structures/base_box3d.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\nfrom abc import abstractmethod\n\nimport numpy as np\nimport torch\nfrom mmcv.ops import box_iou_rotated, points_in_boxes_all, points_in_boxes_part\n\nfrom .utils import limit_period\n\n\nclass BaseInstance3DBoxes(object):\n    \"\"\"Base class for 3D Boxes.\n\n    Note:\n        The box is bottom centered, i.e. the relative position of origin in\n        the box is (0.5, 0.5, 0).\n\n    Args:\n        tensor (torch.Tensor | np.ndarray | list): a N x box_dim matrix.\n        box_dim (int): Number of the dimension of a box.\n            Each row is (x, y, z, x_size, y_size, z_size, yaw).\n            Defaults to 7.\n        with_yaw (bool): Whether the box is with yaw rotation.\n            If False, the value of yaw will be set to 0 as minmax boxes.\n            Defaults to True.\n        origin (tuple[float], optional): Relative position of the box origin.\n            Defaults to (0.5, 0.5, 0). This will guide the box be converted to\n            (0.5, 0.5, 0) mode.\n\n    Attributes:\n        tensor (torch.Tensor): Float matrix of N x box_dim.\n        box_dim (int): Integer indicating the dimension of a box.\n            Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).\n        with_yaw (bool): If True, the value of yaw will be set to 0 as minmax\n            boxes.\n    \"\"\"\n\n    def __init__(self, tensor, box_dim=7, with_yaw=True, origin=(0.5, 0.5, 0)):\n        if isinstance(tensor, torch.Tensor):\n            device = tensor.device\n        else:\n            device = torch.device('cpu')\n        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)\n        if tensor.numel() == 0:\n            # Use reshape, so we don't end up creating a new tensor that\n            # does not depend on the inputs (and consequently confuses jit)\n            tensor = tensor.reshape((0, box_dim)).to(\n                dtype=torch.float32, device=device)\n        assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()\n\n        if tensor.shape[-1] == 6:\n            # If the dimension of boxes is 6, we expand box_dim by padding\n            # 0 as a fake yaw and set with_yaw to False.\n            assert box_dim == 6\n            fake_rot = tensor.new_zeros(tensor.shape[0], 1)\n            tensor = torch.cat((tensor, fake_rot), dim=-1)\n            self.box_dim = box_dim + 1\n            self.with_yaw = False\n        else:\n            self.box_dim = box_dim\n            self.with_yaw = with_yaw\n        self.tensor = tensor.clone()\n\n        if origin != (0.5, 0.5, 0):\n            dst = self.tensor.new_tensor((0.5, 0.5, 0))\n            src = self.tensor.new_tensor(origin)\n            self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)\n\n    @property\n    def volume(self):\n        \"\"\"torch.Tensor: A vector with volume of each box.\"\"\"\n        return self.tensor[:, 3] * self.tensor[:, 4] * self.tensor[:, 5]\n\n    @property\n    def dims(self):\n        \"\"\"torch.Tensor: Size dimensions of each box in shape (N, 3).\"\"\"\n        return self.tensor[:, 3:6]\n\n    @property\n    def yaw(self):\n        \"\"\"torch.Tensor: A vector with yaw of each box in shape (N, ).\"\"\"\n        return self.tensor[:, 6]\n\n    @property\n    def height(self):\n        \"\"\"torch.Tensor: A vector with height of each box in shape (N, ).\"\"\"\n        return self.tensor[:, 5]\n\n    @property\n    def top_height(self):\n        \"\"\"torch.Tensor:\n            A vector with the top height of each box in shape (N, ).\"\"\"\n        return self.bottom_height + self.height\n\n    @property\n    def bottom_height(self):\n        \"\"\"torch.Tensor:\n            A vector with bottom's height of each box in shape (N, ).\"\"\"\n        return self.tensor[:, 2]\n\n    @property\n    def center(self):\n        \"\"\"Calculate the center of all the boxes.\n\n        Note:\n            In MMDetection3D's convention, the bottom center is\n            usually taken as the default center.\n\n            The relative position of the centers in different kinds of\n            boxes are different, e.g., the relative center of a boxes is\n            (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0) in lidar.\n            It is recommended to use ``bottom_center`` or ``gravity_center``\n            for clearer usage.\n\n        Returns:\n            torch.Tensor: A tensor with center of each box in shape (N, 3).\n        \"\"\"\n        return self.bottom_center\n\n    @property\n    def bottom_center(self):\n        \"\"\"torch.Tensor: A tensor with center of each box in shape (N, 3).\"\"\"\n        return self.tensor[:, :3]\n\n    @property\n    def gravity_center(self):\n        \"\"\"torch.Tensor: A tensor with center of each box in shape (N, 3).\"\"\"\n        pass\n\n    @property\n    def corners(self):\n        \"\"\"torch.Tensor:\n            a tensor with 8 corners of each box in shape (N, 8, 3).\"\"\"\n        pass\n\n    @property\n    def bev(self):\n        \"\"\"torch.Tensor: 2D BEV box of each box with rotation\n            in XYWHR format, in shape (N, 5).\"\"\"\n        return self.tensor[:, [0, 1, 3, 4, 6]]\n\n    @property\n    def nearest_bev(self):\n        \"\"\"torch.Tensor: A tensor of 2D BEV box of each box\n            without rotation.\"\"\"\n        # Obtain BEV boxes with rotation in XYWHR format\n        bev_rotated_boxes = self.bev\n        # convert the rotation to a valid range\n        rotations = bev_rotated_boxes[:, -1]\n        normed_rotations = torch.abs(limit_period(rotations, 0.5, np.pi))\n\n        # find the center of boxes\n        conditions = (normed_rotations > np.pi / 4)[..., None]\n        bboxes_xywh = torch.where(conditions, bev_rotated_boxes[:,\n                                                                [0, 1, 3, 2]],\n                                  bev_rotated_boxes[:, :4])\n\n        centers = bboxes_xywh[:, :2]\n        dims = bboxes_xywh[:, 2:]\n        bev_boxes = torch.cat([centers - dims / 2, centers + dims / 2], dim=-1)\n        return bev_boxes\n\n    def in_range_bev(self, box_range):\n        \"\"\"Check whether the boxes are in the given range.\n\n        Args:\n            box_range (list | torch.Tensor): the range of box\n                (x_min, y_min, x_max, y_max)\n\n        Note:\n            The original implementation of SECOND checks whether boxes in\n            a range by checking whether the points are in a convex\n            polygon, we reduce the burden for simpler cases.\n\n        Returns:\n            torch.Tensor: Whether each box is inside the reference range.\n        \"\"\"\n        in_range_flags = ((self.bev[:, 0] > box_range[0])\n                          & (self.bev[:, 1] > box_range[1])\n                          & (self.bev[:, 0] < box_range[2])\n                          & (self.bev[:, 1] < box_range[3]))\n        return in_range_flags\n\n    @abstractmethod\n    def rotate(self, angle, points=None):\n        \"\"\"Rotate boxes with points (optional) with the given angle or rotation\n        matrix.\n\n        Args:\n            angle (float | torch.Tensor | np.ndarray):\n                Rotation angle or rotation matrix.\n            points (torch.Tensor | numpy.ndarray |\n                :obj:`BasePoints`, optional):\n                Points to rotate. Defaults to None.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def flip(self, bev_direction='horizontal'):\n        \"\"\"Flip the boxes in BEV along given BEV direction.\n\n        Args:\n            bev_direction (str, optional): Direction by which to flip.\n                Can be chosen from 'horizontal' and 'vertical'.\n                Defaults to 'horizontal'.\n        \"\"\"\n        pass\n\n    def translate(self, trans_vector):\n        \"\"\"Translate boxes with the given translation vector.\n\n        Args:\n            trans_vector (torch.Tensor): Translation vector of size (1, 3).\n        \"\"\"\n        if not isinstance(trans_vector, torch.Tensor):\n            trans_vector = self.tensor.new_tensor(trans_vector)\n        self.tensor[:, :3] += trans_vector\n\n    def in_range_3d(self, box_range):\n        \"\"\"Check whether the boxes are in the given range.\n\n        Args:\n            box_range (list | torch.Tensor): The range of box\n                (x_min, y_min, z_min, x_max, y_max, z_max)\n\n        Note:\n            In the original implementation of SECOND, checking whether\n            a box in the range checks whether the points are in a convex\n            polygon, we try to reduce the burden for simpler cases.\n\n        Returns:\n            torch.Tensor: A binary vector indicating whether each box is\n                inside the reference range.\n        \"\"\"\n        in_range_flags = ((self.tensor[:, 0] > box_range[0])\n                          & (self.tensor[:, 1] > box_range[1])\n                          & (self.tensor[:, 2] > box_range[2])\n                          & (self.tensor[:, 0] < box_range[3])\n                          & (self.tensor[:, 1] < box_range[4])\n                          & (self.tensor[:, 2] < box_range[5]))\n        return in_range_flags\n\n    @abstractmethod\n    def convert_to(self, dst, rt_mat=None):\n        \"\"\"Convert self to ``dst`` mode.\n\n        Args:\n            dst (:obj:`Box3DMode`): The target Box mode.\n            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and\n                translation matrix between different coordinates.\n                Defaults to None.\n                The conversion from `src` coordinates to `dst` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n\n        Returns:\n            :obj:`BaseInstance3DBoxes`: The converted box of the same type\n                in the `dst` mode.\n        \"\"\"\n        pass\n\n    def scale(self, scale_factor):\n        \"\"\"Scale the box with horizontal and vertical scaling factors.\n\n        Args:\n            scale_factors (float): Scale factors to scale the boxes.\n        \"\"\"\n        self.tensor[:, :6] *= scale_factor\n        self.tensor[:, 7:] *= scale_factor  # velocity\n\n    def limit_yaw(self, offset=0.5, period=np.pi):\n        \"\"\"Limit the yaw to a given period and offset.\n\n        Args:\n            offset (float, optional): The offset of the yaw. Defaults to 0.5.\n            period (float, optional): The expected period. Defaults to np.pi.\n        \"\"\"\n        self.tensor[:, 6] = limit_period(self.tensor[:, 6], offset, period)\n\n    def nonempty(self, threshold=0.0):\n        \"\"\"Find boxes that are non-empty.\n\n        A box is considered empty,\n        if either of its side is no larger than threshold.\n\n        Args:\n            threshold (float, optional): The threshold of minimal sizes.\n                Defaults to 0.0.\n\n        Returns:\n            torch.Tensor: A binary vector which represents whether each\n                box is empty (False) or non-empty (True).\n        \"\"\"\n        box = self.tensor\n        size_x = box[..., 3]\n        size_y = box[..., 4]\n        size_z = box[..., 5]\n        keep = ((size_x > threshold)\n                & (size_y > threshold) & (size_z > threshold))\n        return keep\n\n    def __getitem__(self, item):\n        \"\"\"\n        Note:\n            The following usage are allowed:\n            1. `new_boxes = boxes[3]`:\n                return a `Boxes` that contains only one box.\n            2. `new_boxes = boxes[2:10]`:\n                return a slice of boxes.\n            3. `new_boxes = boxes[vector]`:\n                where vector is a torch.BoolTensor with `length = len(boxes)`.\n                Nonzero elements in the vector will be selected.\n            Note that the returned Boxes might share storage with this Boxes,\n            subject to Pytorch's indexing semantics.\n\n        Returns:\n            :obj:`BaseInstance3DBoxes`: A new object of\n                :class:`BaseInstance3DBoxes` after indexing.\n        \"\"\"\n        original_type = type(self)\n        if isinstance(item, int):\n            return original_type(\n                self.tensor[item].view(1, -1),\n                box_dim=self.box_dim,\n                with_yaw=self.with_yaw)\n        b = self.tensor[item]\n        assert b.dim() == 2, \\\n            f'Indexing on Boxes with {item} failed to return a matrix!'\n        return original_type(b, box_dim=self.box_dim, with_yaw=self.with_yaw)\n\n    def __len__(self):\n        \"\"\"int: Number of boxes in the current object.\"\"\"\n        return self.tensor.shape[0]\n\n    def __repr__(self):\n        \"\"\"str: Return a strings that describes the object.\"\"\"\n        return self.__class__.__name__ + '(\\n    ' + str(self.tensor) + ')'\n\n    @classmethod\n    def cat(cls, boxes_list):\n        \"\"\"Concatenate a list of Boxes into a single Boxes.\n\n        Args:\n            boxes_list (list[:obj:`BaseInstance3DBoxes`]): List of boxes.\n\n        Returns:\n            :obj:`BaseInstance3DBoxes`: The concatenated Boxes.\n        \"\"\"\n        assert isinstance(boxes_list, (list, tuple))\n        if len(boxes_list) == 0:\n            return cls(torch.empty(0))\n        assert all(isinstance(box, cls) for box in boxes_list)\n\n        # use torch.cat (v.s. layers.cat)\n        # so the returned boxes never share storage with input\n        cat_boxes = cls(\n            torch.cat([b.tensor for b in boxes_list], dim=0),\n            box_dim=boxes_list[0].tensor.shape[1],\n            with_yaw=boxes_list[0].with_yaw)\n        return cat_boxes\n\n    def to(self, device):\n        \"\"\"Convert current boxes to a specific device.\n\n        Args:\n            device (str | :obj:`torch.device`): The name of the device.\n\n        Returns:\n            :obj:`BaseInstance3DBoxes`: A new boxes object on the\n                specific device.\n        \"\"\"\n        original_type = type(self)\n        return original_type(\n            self.tensor.to(device),\n            box_dim=self.box_dim,\n            with_yaw=self.with_yaw)\n\n    def clone(self):\n        \"\"\"Clone the Boxes.\n\n        Returns:\n            :obj:`BaseInstance3DBoxes`: Box object with the same properties\n                as self.\n        \"\"\"\n        original_type = type(self)\n        return original_type(\n            self.tensor.clone(), box_dim=self.box_dim, with_yaw=self.with_yaw)\n\n    @property\n    def device(self):\n        \"\"\"str: The device of the boxes are on.\"\"\"\n        return self.tensor.device\n\n    def __iter__(self):\n        \"\"\"Yield a box as a Tensor of shape (4,) at a time.\n\n        Returns:\n            torch.Tensor: A box of shape (4,).\n        \"\"\"\n        yield from self.tensor\n\n    @classmethod\n    def height_overlaps(cls, boxes1, boxes2, mode='iou'):\n        \"\"\"Calculate height overlaps of two boxes.\n\n        Note:\n            This function calculates the height overlaps between boxes1 and\n            boxes2,  boxes1 and boxes2 should be in the same type.\n\n        Args:\n            boxes1 (:obj:`BaseInstance3DBoxes`): Boxes 1 contain N boxes.\n            boxes2 (:obj:`BaseInstance3DBoxes`): Boxes 2 contain M boxes.\n            mode (str, optional): Mode of IoU calculation. Defaults to 'iou'.\n\n        Returns:\n            torch.Tensor: Calculated iou of boxes.\n        \"\"\"\n        assert isinstance(boxes1, BaseInstance3DBoxes)\n        assert isinstance(boxes2, BaseInstance3DBoxes)\n        assert type(boxes1) == type(boxes2), '\"boxes1\" and \"boxes2\" should' \\\n            f'be in the same type, got {type(boxes1)} and {type(boxes2)}.'\n\n        boxes1_top_height = boxes1.top_height.view(-1, 1)\n        boxes1_bottom_height = boxes1.bottom_height.view(-1, 1)\n        boxes2_top_height = boxes2.top_height.view(1, -1)\n        boxes2_bottom_height = boxes2.bottom_height.view(1, -1)\n\n        heighest_of_bottom = torch.max(boxes1_bottom_height,\n                                       boxes2_bottom_height)\n        lowest_of_top = torch.min(boxes1_top_height, boxes2_top_height)\n        overlaps_h = torch.clamp(lowest_of_top - heighest_of_bottom, min=0)\n        return overlaps_h\n\n    @classmethod\n    def overlaps(cls, boxes1, boxes2, mode='iou'):\n        \"\"\"Calculate 3D overlaps of two boxes.\n\n        Note:\n            This function calculates the overlaps between ``boxes1`` and\n            ``boxes2``, ``boxes1`` and ``boxes2`` should be in the same type.\n\n        Args:\n            boxes1 (:obj:`BaseInstance3DBoxes`): Boxes 1 contain N boxes.\n            boxes2 (:obj:`BaseInstance3DBoxes`): Boxes 2 contain M boxes.\n            mode (str, optional): Mode of iou calculation. Defaults to 'iou'.\n\n        Returns:\n            torch.Tensor: Calculated 3D overlaps of the boxes.\n        \"\"\"\n        assert isinstance(boxes1, BaseInstance3DBoxes)\n        assert isinstance(boxes2, BaseInstance3DBoxes)\n        assert type(boxes1) == type(boxes2), '\"boxes1\" and \"boxes2\" should' \\\n            f'be in the same type, got {type(boxes1)} and {type(boxes2)}.'\n\n        assert mode in ['iou', 'iof']\n\n        rows = len(boxes1)\n        cols = len(boxes2)\n        if rows * cols == 0:\n            return boxes1.tensor.new(rows, cols)\n\n        # height overlap\n        overlaps_h = cls.height_overlaps(boxes1, boxes2)\n\n        # bev overlap\n        iou2d = box_iou_rotated(boxes1.bev, boxes2.bev)\n        areas1 = (boxes1.bev[:, 2] * boxes1.bev[:, 3]).unsqueeze(1).expand(\n            rows, cols)\n        areas2 = (boxes2.bev[:, 2] * boxes2.bev[:, 3]).unsqueeze(0).expand(\n            rows, cols)\n        overlaps_bev = iou2d * (areas1 + areas2) / (1 + iou2d)\n\n        # 3d overlaps\n        overlaps_3d = overlaps_bev.to(boxes1.device) * overlaps_h\n\n        volume1 = boxes1.volume.view(-1, 1)\n        volume2 = boxes2.volume.view(1, -1)\n\n        if mode == 'iou':\n            # the clamp func is used to avoid division of 0\n            iou3d = overlaps_3d / torch.clamp(\n                volume1 + volume2 - overlaps_3d, min=1e-8)\n        else:\n            iou3d = overlaps_3d / torch.clamp(volume1, min=1e-8)\n\n        return iou3d\n\n    def new_box(self, data):\n        \"\"\"Create a new box object with data.\n\n        The new box and its tensor has the similar properties\n            as self and self.tensor, respectively.\n\n        Args:\n            data (torch.Tensor | numpy.array | list): Data to be copied.\n\n        Returns:\n            :obj:`BaseInstance3DBoxes`: A new bbox object with ``data``,\n                the object's other properties are similar to ``self``.\n        \"\"\"\n        new_tensor = self.tensor.new_tensor(data) \\\n            if not isinstance(data, torch.Tensor) else data.to(self.device)\n        original_type = type(self)\n        return original_type(\n            new_tensor, box_dim=self.box_dim, with_yaw=self.with_yaw)\n\n    def points_in_boxes_part(self, points, boxes_override=None):\n        \"\"\"Find the box in which each point is.\n\n        Args:\n            points (torch.Tensor): Points in shape (1, M, 3) or (M, 3),\n                3 dimensions are (x, y, z) in LiDAR or depth coordinate.\n            boxes_override (torch.Tensor, optional): Boxes to override\n                `self.tensor`. Defaults to None.\n\n        Returns:\n            torch.Tensor: The index of the first box that each point\n                is in, in shape (M, ). Default value is -1\n                (if the point is not enclosed by any box).\n\n        Note:\n            If a point is enclosed by multiple boxes, the index of the\n            first box will be returned.\n        \"\"\"\n        if boxes_override is not None:\n            boxes = boxes_override\n        else:\n            boxes = self.tensor\n        if points.dim() == 2:\n            points = points.unsqueeze(0)\n        box_idx = points_in_boxes_part(points,\n                                       boxes.unsqueeze(0).to(\n                                           points.device)).squeeze(0)\n        return box_idx\n\n    def points_in_boxes_all(self, points, boxes_override=None):\n        \"\"\"Find all boxes in which each point is.\n\n        Args:\n            points (torch.Tensor): Points in shape (1, M, 3) or (M, 3),\n                3 dimensions are (x, y, z) in LiDAR or depth coordinate.\n            boxes_override (torch.Tensor, optional): Boxes to override\n                `self.tensor`. Defaults to None.\n\n        Returns:\n            torch.Tensor: A tensor indicating whether a point is in a box,\n                in shape (M, T). T is the number of boxes. Denote this\n                tensor as A, if the m^th point is in the t^th box, then\n                `A[m, t] == 1`, elsewise `A[m, t] == 0`.\n        \"\"\"\n        if boxes_override is not None:\n            boxes = boxes_override\n        else:\n            boxes = self.tensor\n\n        points_clone = points.clone()[..., :3]\n        if points_clone.dim() == 2:\n            points_clone = points_clone.unsqueeze(0)\n        else:\n            assert points_clone.dim() == 3 and points_clone.shape[0] == 1\n\n        boxes = boxes.to(points_clone.device).unsqueeze(0)\n        box_idxs_of_pts = points_in_boxes_all(points_clone, boxes)\n\n        return box_idxs_of_pts.squeeze(0)\n\n    def points_in_boxes(self, points, boxes_override=None):\n        warnings.warn('DeprecationWarning: points_in_boxes is a '\n                      'deprecated method, please consider using '\n                      'points_in_boxes_part.')\n        return self.points_in_boxes_part(points, boxes_override)\n\n    def points_in_boxes_batch(self, points, boxes_override=None):\n        warnings.warn('DeprecationWarning: points_in_boxes_batch is a '\n                      'deprecated method, please consider using '\n                      'points_in_boxes_all.')\n        return self.points_in_boxes_all(points, boxes_override)\n"
  },
  {
    "path": "mmdet3d/core/bbox/structures/box_3d_mode.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom enum import IntEnum, unique\n\nimport numpy as np\nimport torch\n\nfrom .base_box3d import BaseInstance3DBoxes\nfrom .cam_box3d import CameraInstance3DBoxes\nfrom .depth_box3d import DepthInstance3DBoxes\nfrom .lidar_box3d import LiDARInstance3DBoxes\nfrom .utils import limit_period\n\n\n@unique\nclass Box3DMode(IntEnum):\n    r\"\"\"Enum of different ways to represent a box.\n\n    Coordinates in LiDAR:\n\n    .. code-block:: none\n\n                    up z\n                       ^   x front\n                       |  /\n                       | /\n        left y <------ 0\n\n    The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),\n    and the yaw is around the z axis, thus the rotation axis=2.\n\n    Coordinates in camera:\n\n    .. code-block:: none\n\n                z front\n               /\n              /\n             0 ------> x right\n             |\n             |\n             v\n        down y\n\n    The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],\n    and the yaw is around the y axis, thus the rotation axis=1.\n\n    Coordinates in Depth mode:\n\n    .. code-block:: none\n\n        up z\n           ^   y front\n           |  /\n           | /\n           0 ------> x right\n\n    The relative coordinate of bottom center in a DEPTH box is (0.5, 0.5, 0),\n    and the yaw is around the z axis, thus the rotation axis=2.\n    \"\"\"\n\n    LIDAR = 0\n    CAM = 1\n    DEPTH = 2\n\n    @staticmethod\n    def convert(box, src, dst, rt_mat=None, with_yaw=True):\n        \"\"\"Convert boxes from `src` mode to `dst` mode.\n\n        Args:\n            box (tuple | list | np.ndarray |\n                torch.Tensor | :obj:`BaseInstance3DBoxes`):\n                Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7.\n            src (:obj:`Box3DMode`): The src Box mode.\n            dst (:obj:`Box3DMode`): The target Box mode.\n            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and\n                translation matrix between different coordinates.\n                Defaults to None.\n                The conversion from `src` coordinates to `dst` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n            with_yaw (bool, optional): If `box` is an instance of\n                :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle.\n                Defaults to True.\n\n        Returns:\n            (tuple | list | np.ndarray | torch.Tensor |\n                :obj:`BaseInstance3DBoxes`):\n                The converted box of the same type.\n        \"\"\"\n        if src == dst:\n            return box\n\n        is_numpy = isinstance(box, np.ndarray)\n        is_Instance3DBoxes = isinstance(box, BaseInstance3DBoxes)\n        single_box = isinstance(box, (list, tuple))\n        if single_box:\n            assert len(box) >= 7, (\n                'Box3DMode.convert takes either a k-tuple/list or '\n                'an Nxk array/tensor, where k >= 7')\n            arr = torch.tensor(box)[None, :]\n        else:\n            # avoid modifying the input box\n            if is_numpy:\n                arr = torch.from_numpy(np.asarray(box)).clone()\n            elif is_Instance3DBoxes:\n                arr = box.tensor.clone()\n            else:\n                arr = box.clone()\n\n        if is_Instance3DBoxes:\n            with_yaw = box.with_yaw\n\n        # convert box from `src` mode to `dst` mode.\n        x_size, y_size, z_size = arr[..., 3:4], arr[..., 4:5], arr[..., 5:6]\n        if with_yaw:\n            yaw = arr[..., 6:7]\n        if src == Box3DMode.LIDAR and dst == Box3DMode.CAM:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]])\n            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)\n            if with_yaw:\n                yaw = -yaw - np.pi / 2\n                yaw = limit_period(yaw, period=np.pi * 2)\n        elif src == Box3DMode.CAM and dst == Box3DMode.LIDAR:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])\n            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)\n            if with_yaw:\n                yaw = -yaw - np.pi / 2\n                yaw = limit_period(yaw, period=np.pi * 2)\n        elif src == Box3DMode.DEPTH and dst == Box3DMode.CAM:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])\n            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)\n            if with_yaw:\n                yaw = -yaw\n        elif src == Box3DMode.CAM and dst == Box3DMode.DEPTH:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])\n            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)\n            if with_yaw:\n                yaw = -yaw\n        elif src == Box3DMode.LIDAR and dst == Box3DMode.DEPTH:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]])\n            xyz_size = torch.cat([x_size, y_size, z_size], dim=-1)\n            if with_yaw:\n                yaw = yaw + np.pi / 2\n                yaw = limit_period(yaw, period=np.pi * 2)\n        elif src == Box3DMode.DEPTH and dst == Box3DMode.LIDAR:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])\n            xyz_size = torch.cat([x_size, y_size, z_size], dim=-1)\n            if with_yaw:\n                yaw = yaw - np.pi / 2\n                yaw = limit_period(yaw, period=np.pi * 2)\n        else:\n            raise NotImplementedError(\n                f'Conversion from Box3DMode {src} to {dst} '\n                'is not supported yet')\n\n        if not isinstance(rt_mat, torch.Tensor):\n            rt_mat = arr.new_tensor(rt_mat)\n        if rt_mat.size(1) == 4:\n            extended_xyz = torch.cat(\n                [arr[..., :3], arr.new_ones(arr.size(0), 1)], dim=-1)\n            xyz = extended_xyz @ rt_mat.t()\n        else:\n            xyz = arr[..., :3] @ rt_mat.t()\n\n        if with_yaw:\n            remains = arr[..., 7:]\n            arr = torch.cat([xyz[..., :3], xyz_size, yaw, remains], dim=-1)\n        else:\n            remains = arr[..., 6:]\n            arr = torch.cat([xyz[..., :3], xyz_size, remains], dim=-1)\n\n        # convert arr to the original type\n        original_type = type(box)\n        if single_box:\n            return original_type(arr.flatten().tolist())\n        if is_numpy:\n            return arr.numpy()\n        elif is_Instance3DBoxes:\n            if dst == Box3DMode.CAM:\n                target_type = CameraInstance3DBoxes\n            elif dst == Box3DMode.LIDAR:\n                target_type = LiDARInstance3DBoxes\n            elif dst == Box3DMode.DEPTH:\n                target_type = DepthInstance3DBoxes\n            else:\n                raise NotImplementedError(\n                    f'Conversion to {dst} through {original_type}'\n                    ' is not supported yet')\n            return target_type(arr, box_dim=arr.size(-1), with_yaw=with_yaw)\n        else:\n            return arr\n"
  },
  {
    "path": "mmdet3d/core/bbox/structures/cam_box3d.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\n\nfrom ...points import BasePoints\nfrom .base_box3d import BaseInstance3DBoxes\nfrom .utils import rotation_3d_in_axis, yaw2local\n\n\nclass CameraInstance3DBoxes(BaseInstance3DBoxes):\n    \"\"\"3D boxes of instances in CAM coordinates.\n\n    Coordinates in camera:\n\n    .. code-block:: none\n\n                z front (yaw=-0.5*pi)\n               /\n              /\n             0 ------> x right (yaw=0)\n             |\n             |\n             v\n        down y\n\n    The relative coordinate of bottom center in a CAM box is (0.5, 1.0, 0.5),\n    and the yaw is around the y axis, thus the rotation axis=1.\n    The yaw is 0 at the positive direction of x axis, and decreases from\n    the positive direction of x to the positive direction of z.\n\n    Attributes:\n        tensor (torch.Tensor): Float matrix in shape (N, box_dim).\n        box_dim (int): Integer indicating the dimension of a box\n            Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).\n        with_yaw (bool): If True, the value of yaw will be set to 0 as\n            axis-aligned boxes tightly enclosing the original boxes.\n    \"\"\"\n    YAW_AXIS = 1\n\n    def __init__(self,\n                 tensor,\n                 box_dim=7,\n                 with_yaw=True,\n                 origin=(0.5, 1.0, 0.5)):\n        if isinstance(tensor, torch.Tensor):\n            device = tensor.device\n        else:\n            device = torch.device('cpu')\n        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)\n        if tensor.numel() == 0:\n            # Use reshape, so we don't end up creating a new tensor that\n            # does not depend on the inputs (and consequently confuses jit)\n            tensor = tensor.reshape((0, box_dim)).to(\n                dtype=torch.float32, device=device)\n        assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()\n\n        if tensor.shape[-1] == 6:\n            # If the dimension of boxes is 6, we expand box_dim by padding\n            # 0 as a fake yaw and set with_yaw to False.\n            assert box_dim == 6\n            fake_rot = tensor.new_zeros(tensor.shape[0], 1)\n            tensor = torch.cat((tensor, fake_rot), dim=-1)\n            self.box_dim = box_dim + 1\n            self.with_yaw = False\n        else:\n            self.box_dim = box_dim\n            self.with_yaw = with_yaw\n        self.tensor = tensor.clone()\n\n        if origin != (0.5, 1.0, 0.5):\n            dst = self.tensor.new_tensor((0.5, 1.0, 0.5))\n            src = self.tensor.new_tensor(origin)\n            self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)\n\n    @property\n    def height(self):\n        \"\"\"torch.Tensor: A vector with height of each box in shape (N, ).\"\"\"\n        return self.tensor[:, 4]\n\n    @property\n    def top_height(self):\n        \"\"\"torch.Tensor:\n            A vector with the top height of each box in shape (N, ).\"\"\"\n        # the positive direction is down rather than up\n        return self.bottom_height - self.height\n\n    @property\n    def bottom_height(self):\n        \"\"\"torch.Tensor:\n            A vector with bottom's height of each box in shape (N, ).\"\"\"\n        return self.tensor[:, 1]\n\n    @property\n    def local_yaw(self):\n        \"\"\"torch.Tensor:\n            A vector with local yaw of each box in shape (N, ).\n            local_yaw equals to alpha in kitti, which is commonly\n            used in monocular 3D object detection task, so only\n            :obj:`CameraInstance3DBoxes` has the property.\n        \"\"\"\n        yaw = self.yaw\n        loc = self.gravity_center\n        local_yaw = yaw2local(yaw, loc)\n\n        return local_yaw\n\n    @property\n    def gravity_center(self):\n        \"\"\"torch.Tensor: A tensor with center of each box in shape (N, 3).\"\"\"\n        bottom_center = self.bottom_center\n        gravity_center = torch.zeros_like(bottom_center)\n        gravity_center[:, [0, 2]] = bottom_center[:, [0, 2]]\n        gravity_center[:, 1] = bottom_center[:, 1] - self.tensor[:, 4] * 0.5\n        return gravity_center\n\n    @property\n    def corners(self):\n        \"\"\"torch.Tensor: Coordinates of corners of all the boxes in\n                         shape (N, 8, 3).\n\n        Convert the boxes to  in clockwise order, in the form of\n        (x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)\n\n        .. code-block:: none\n\n                         front z\n                              /\n                             /\n               (x0, y0, z1) + -----------  + (x1, y0, z1)\n                           /|            / |\n                          / |           /  |\n            (x0, y0, z0) + ----------- +   + (x1, y1, z1)\n                         |  /      .   |  /\n                         | / origin    | /\n            (x0, y1, z0) + ----------- + -------> x right\n                         |             (x1, y1, z0)\n                         |\n                         v\n                    down y\n        \"\"\"\n        if self.tensor.numel() == 0:\n            return torch.empty([0, 8, 3], device=self.tensor.device)\n\n        dims = self.dims\n        corners_norm = torch.from_numpy(\n            np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1)).to(\n                device=dims.device, dtype=dims.dtype)\n\n        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]\n        # use relative origin [0.5, 1, 0.5]\n        corners_norm = corners_norm - dims.new_tensor([0.5, 1, 0.5])\n        corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])\n\n        corners = rotation_3d_in_axis(\n            corners, self.tensor[:, 6], axis=self.YAW_AXIS)\n        corners += self.tensor[:, :3].view(-1, 1, 3)\n        return corners\n\n    @property\n    def bev(self):\n        \"\"\"torch.Tensor: 2D BEV box of each box with rotation\n            in XYWHR format, in shape (N, 5).\"\"\"\n        bev = self.tensor[:, [0, 2, 3, 5, 6]].clone()\n        # positive direction of the gravity axis\n        # in cam coord system points to the earth\n        # so the bev yaw angle needs to be reversed\n        bev[:, -1] = -bev[:, -1]\n        return bev\n\n    def rotate(self, angle, points=None):\n        \"\"\"Rotate boxes with points (optional) with the given angle or rotation\n        matrix.\n\n        Args:\n            angle (float | torch.Tensor | np.ndarray):\n                Rotation angle or rotation matrix.\n            points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):\n                Points to rotate. Defaults to None.\n\n        Returns:\n            tuple or None: When ``points`` is None, the function returns\n                None, otherwise it returns the rotated points and the\n                rotation matrix ``rot_mat_T``.\n        \"\"\"\n        if not isinstance(angle, torch.Tensor):\n            angle = self.tensor.new_tensor(angle)\n\n        assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1, \\\n            f'invalid rotation angle shape {angle.shape}'\n\n        if angle.numel() == 1:\n            self.tensor[:, 0:3], rot_mat_T = rotation_3d_in_axis(\n                self.tensor[:, 0:3],\n                angle,\n                axis=self.YAW_AXIS,\n                return_mat=True)\n        else:\n            rot_mat_T = angle\n            rot_sin = rot_mat_T[2, 0]\n            rot_cos = rot_mat_T[0, 0]\n            angle = np.arctan2(rot_sin, rot_cos)\n            self.tensor[:, 0:3] = self.tensor[:, 0:3] @ rot_mat_T\n\n        self.tensor[:, 6] += angle\n\n        if points is not None:\n            if isinstance(points, torch.Tensor):\n                points[:, :3] = points[:, :3] @ rot_mat_T\n            elif isinstance(points, np.ndarray):\n                rot_mat_T = rot_mat_T.cpu().numpy()\n                points[:, :3] = np.dot(points[:, :3], rot_mat_T)\n            elif isinstance(points, BasePoints):\n                points.rotate(rot_mat_T)\n            else:\n                raise ValueError\n            return points, rot_mat_T\n\n    def flip(self, bev_direction='horizontal', points=None):\n        \"\"\"Flip the boxes in BEV along given BEV direction.\n\n        In CAM coordinates, it flips the x (horizontal) or z (vertical) axis.\n\n        Args:\n            bev_direction (str): Flip direction (horizontal or vertical).\n            points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):\n                Points to flip. Defaults to None.\n\n        Returns:\n            torch.Tensor, numpy.ndarray or None: Flipped points.\n        \"\"\"\n        assert bev_direction in ('horizontal', 'vertical')\n        if bev_direction == 'horizontal':\n            self.tensor[:, 0::7] = -self.tensor[:, 0::7]\n            if self.with_yaw:\n                self.tensor[:, 6] = -self.tensor[:, 6] + np.pi\n        elif bev_direction == 'vertical':\n            self.tensor[:, 2::7] = -self.tensor[:, 2::7]\n            if self.with_yaw:\n                self.tensor[:, 6] = -self.tensor[:, 6]\n\n        if points is not None:\n            assert isinstance(points, (torch.Tensor, np.ndarray, BasePoints))\n            if isinstance(points, (torch.Tensor, np.ndarray)):\n                if bev_direction == 'horizontal':\n                    points[:, 0] = -points[:, 0]\n                elif bev_direction == 'vertical':\n                    points[:, 2] = -points[:, 2]\n            elif isinstance(points, BasePoints):\n                points.flip(bev_direction)\n            return points\n\n    @classmethod\n    def height_overlaps(cls, boxes1, boxes2, mode='iou'):\n        \"\"\"Calculate height overlaps of two boxes.\n\n        This function calculates the height overlaps between ``boxes1`` and\n        ``boxes2``, where ``boxes1`` and ``boxes2`` should be in the same type.\n\n        Args:\n            boxes1 (:obj:`CameraInstance3DBoxes`): Boxes 1 contain N boxes.\n            boxes2 (:obj:`CameraInstance3DBoxes`): Boxes 2 contain M boxes.\n            mode (str, optional): Mode of iou calculation. Defaults to 'iou'.\n\n        Returns:\n            torch.Tensor: Calculated iou of boxes' heights.\n        \"\"\"\n        assert isinstance(boxes1, CameraInstance3DBoxes)\n        assert isinstance(boxes2, CameraInstance3DBoxes)\n\n        boxes1_top_height = boxes1.top_height.view(-1, 1)\n        boxes1_bottom_height = boxes1.bottom_height.view(-1, 1)\n        boxes2_top_height = boxes2.top_height.view(1, -1)\n        boxes2_bottom_height = boxes2.bottom_height.view(1, -1)\n\n        # positive direction of the gravity axis\n        # in cam coord system points to the earth\n        heighest_of_bottom = torch.min(boxes1_bottom_height,\n                                       boxes2_bottom_height)\n        lowest_of_top = torch.max(boxes1_top_height, boxes2_top_height)\n        overlaps_h = torch.clamp(heighest_of_bottom - lowest_of_top, min=0)\n        return overlaps_h\n\n    def convert_to(self, dst, rt_mat=None):\n        \"\"\"Convert self to ``dst`` mode.\n\n        Args:\n            dst (:obj:`Box3DMode`): The target Box mode.\n            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and\n                translation matrix between different coordinates.\n                Defaults to None.\n                The conversion from ``src`` coordinates to ``dst`` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n\n        Returns:\n            :obj:`BaseInstance3DBoxes`:\n                The converted box of the same type in the ``dst`` mode.\n        \"\"\"\n        from .box_3d_mode import Box3DMode\n        return Box3DMode.convert(\n            box=self, src=Box3DMode.CAM, dst=dst, rt_mat=rt_mat)\n\n    def points_in_boxes_part(self, points, boxes_override=None):\n        \"\"\"Find the box in which each point is.\n\n        Args:\n            points (torch.Tensor): Points in shape (1, M, 3) or (M, 3),\n                3 dimensions are (x, y, z) in LiDAR or depth coordinate.\n            boxes_override (torch.Tensor, optional): Boxes to override\n                `self.tensor `. Defaults to None.\n\n        Returns:\n            torch.Tensor: The index of the box in which\n                each point is, in shape (M, ). Default value is -1\n                (if the point is not enclosed by any box).\n        \"\"\"\n        from .coord_3d_mode import Coord3DMode\n\n        points_lidar = Coord3DMode.convert(points, Coord3DMode.CAM,\n                                           Coord3DMode.LIDAR)\n        if boxes_override is not None:\n            boxes_lidar = boxes_override\n        else:\n            boxes_lidar = Coord3DMode.convert(self.tensor, Coord3DMode.CAM,\n                                              Coord3DMode.LIDAR)\n\n        box_idx = super().points_in_boxes_part(points_lidar, boxes_lidar)\n        return box_idx\n\n    def points_in_boxes_all(self, points, boxes_override=None):\n        \"\"\"Find all boxes in which each point is.\n\n        Args:\n            points (torch.Tensor): Points in shape (1, M, 3) or (M, 3),\n                3 dimensions are (x, y, z) in LiDAR or depth coordinate.\n            boxes_override (torch.Tensor, optional): Boxes to override\n                `self.tensor `. Defaults to None.\n\n        Returns:\n            torch.Tensor: The index of all boxes in which each point is,\n                in shape (B, M, T).\n        \"\"\"\n        from .coord_3d_mode import Coord3DMode\n\n        points_lidar = Coord3DMode.convert(points, Coord3DMode.CAM,\n                                           Coord3DMode.LIDAR)\n        if boxes_override is not None:\n            boxes_lidar = boxes_override\n        else:\n            boxes_lidar = Coord3DMode.convert(self.tensor, Coord3DMode.CAM,\n                                              Coord3DMode.LIDAR)\n\n        box_idx = super().points_in_boxes_all(points_lidar, boxes_lidar)\n        return box_idx\n"
  },
  {
    "path": "mmdet3d/core/bbox/structures/coord_3d_mode.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom enum import IntEnum, unique\n\nimport numpy as np\nimport torch\n\nfrom ...points import BasePoints, CameraPoints, DepthPoints, LiDARPoints\nfrom .base_box3d import BaseInstance3DBoxes\nfrom .box_3d_mode import Box3DMode\n\n\n@unique\nclass Coord3DMode(IntEnum):\n    r\"\"\"Enum of different ways to represent a box\n        and point cloud.\n\n    Coordinates in LiDAR:\n\n    .. code-block:: none\n\n                    up z\n                       ^   x front\n                       |  /\n                       | /\n        left y <------ 0\n\n    The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),\n    and the yaw is around the z axis, thus the rotation axis=2.\n\n    Coordinates in camera:\n\n    .. code-block:: none\n\n                z front\n               /\n              /\n             0 ------> x right\n             |\n             |\n             v\n        down y\n\n    The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],\n    and the yaw is around the y axis, thus the rotation axis=1.\n\n    Coordinates in Depth mode:\n\n    .. code-block:: none\n\n        up z\n           ^   y front\n           |  /\n           | /\n           0 ------> x right\n\n    The relative coordinate of bottom center in a DEPTH box is (0.5, 0.5, 0),\n    and the yaw is around the z axis, thus the rotation axis=2.\n    \"\"\"\n\n    LIDAR = 0\n    CAM = 1\n    DEPTH = 2\n\n    @staticmethod\n    def convert(input, src, dst, rt_mat=None, with_yaw=True, is_point=True):\n        \"\"\"Convert boxes or points from `src` mode to `dst` mode.\n\n        Args:\n            input (tuple | list | np.ndarray | torch.Tensor |\n                :obj:`BaseInstance3DBoxes` | :obj:`BasePoints`):\n                Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7.\n            src (:obj:`Box3DMode` | :obj:`Coord3DMode`): The source mode.\n            dst (:obj:`Box3DMode` | :obj:`Coord3DMode`): The target mode.\n            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and\n                translation matrix between different coordinates.\n                Defaults to None.\n                The conversion from `src` coordinates to `dst` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n            with_yaw (bool): If `box` is an instance of\n                :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle.\n                Defaults to True.\n            is_point (bool): If `input` is neither an instance of\n                :obj:`BaseInstance3DBoxes` nor an instance of\n                :obj:`BasePoints`, whether or not it is point data.\n                Defaults to True.\n\n        Returns:\n            (tuple | list | np.ndarray | torch.Tensor |\n                :obj:`BaseInstance3DBoxes` | :obj:`BasePoints`):\n                The converted box of the same type.\n        \"\"\"\n        if isinstance(input, BaseInstance3DBoxes):\n            return Coord3DMode.convert_box(\n                input, src, dst, rt_mat=rt_mat, with_yaw=with_yaw)\n        elif isinstance(input, BasePoints):\n            return Coord3DMode.convert_point(input, src, dst, rt_mat=rt_mat)\n        elif isinstance(input, (tuple, list, np.ndarray, torch.Tensor)):\n            if is_point:\n                return Coord3DMode.convert_point(\n                    input, src, dst, rt_mat=rt_mat)\n            else:\n                return Coord3DMode.convert_box(\n                    input, src, dst, rt_mat=rt_mat, with_yaw=with_yaw)\n        else:\n            raise NotImplementedError\n\n    @staticmethod\n    def convert_box(box, src, dst, rt_mat=None, with_yaw=True):\n        \"\"\"Convert boxes from `src` mode to `dst` mode.\n\n        Args:\n            box (tuple | list | np.ndarray |\n                torch.Tensor | :obj:`BaseInstance3DBoxes`):\n                Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7.\n            src (:obj:`Box3DMode`): The src Box mode.\n            dst (:obj:`Box3DMode`): The target Box mode.\n            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and\n                translation matrix between different coordinates.\n                Defaults to None.\n                The conversion from `src` coordinates to `dst` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n            with_yaw (bool): If `box` is an instance of\n                :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle.\n                Defaults to True.\n\n        Returns:\n            (tuple | list | np.ndarray | torch.Tensor |\n                :obj:`BaseInstance3DBoxes`):\n                The converted box of the same type.\n        \"\"\"\n        return Box3DMode.convert(box, src, dst, rt_mat=rt_mat)\n\n    @staticmethod\n    def convert_point(point, src, dst, rt_mat=None):\n        \"\"\"Convert points from `src` mode to `dst` mode.\n\n        Args:\n            point (tuple | list | np.ndarray |\n                torch.Tensor | :obj:`BasePoints`):\n                Can be a k-tuple, k-list or an Nxk array/tensor.\n            src (:obj:`CoordMode`): The src Point mode.\n            dst (:obj:`CoordMode`): The target Point mode.\n            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and\n                translation matrix between different coordinates.\n                Defaults to None.\n                The conversion from `src` coordinates to `dst` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n\n        Returns:\n            (tuple | list | np.ndarray | torch.Tensor | :obj:`BasePoints`):\n                The converted point of the same type.\n        \"\"\"\n        if src == dst:\n            return point\n\n        is_numpy = isinstance(point, np.ndarray)\n        is_InstancePoints = isinstance(point, BasePoints)\n        single_point = isinstance(point, (list, tuple))\n        if single_point:\n            assert len(point) >= 3, (\n                'CoordMode.convert takes either a k-tuple/list or '\n                'an Nxk array/tensor, where k >= 3')\n            arr = torch.tensor(point)[None, :]\n        else:\n            # avoid modifying the input point\n            if is_numpy:\n                arr = torch.from_numpy(np.asarray(point)).clone()\n            elif is_InstancePoints:\n                arr = point.tensor.clone()\n            else:\n                arr = point.clone()\n\n        # convert point from `src` mode to `dst` mode.\n        if src == Coord3DMode.LIDAR and dst == Coord3DMode.CAM:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]])\n        elif src == Coord3DMode.CAM and dst == Coord3DMode.LIDAR:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])\n        elif src == Coord3DMode.DEPTH and dst == Coord3DMode.CAM:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])\n        elif src == Coord3DMode.CAM and dst == Coord3DMode.DEPTH:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])\n        elif src == Coord3DMode.LIDAR and dst == Coord3DMode.DEPTH:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]])\n        elif src == Coord3DMode.DEPTH and dst == Coord3DMode.LIDAR:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])\n        else:\n            raise NotImplementedError(\n                f'Conversion from Coord3DMode {src} to {dst} '\n                'is not supported yet')\n\n        if not isinstance(rt_mat, torch.Tensor):\n            rt_mat = arr.new_tensor(rt_mat)\n        if rt_mat.size(1) == 4:\n            extended_xyz = torch.cat(\n                [arr[..., :3], arr.new_ones(arr.size(0), 1)], dim=-1)\n            xyz = extended_xyz @ rt_mat.t()\n        else:\n            xyz = arr[..., :3] @ rt_mat.t()\n\n        remains = arr[..., 3:]\n        arr = torch.cat([xyz[..., :3], remains], dim=-1)\n\n        # convert arr to the original type\n        original_type = type(point)\n        if single_point:\n            return original_type(arr.flatten().tolist())\n        if is_numpy:\n            return arr.numpy()\n        elif is_InstancePoints:\n            if dst == Coord3DMode.CAM:\n                target_type = CameraPoints\n            elif dst == Coord3DMode.LIDAR:\n                target_type = LiDARPoints\n            elif dst == Coord3DMode.DEPTH:\n                target_type = DepthPoints\n            else:\n                raise NotImplementedError(\n                    f'Conversion to {dst} through {original_type}'\n                    ' is not supported yet')\n            return target_type(\n                arr,\n                points_dim=arr.size(-1),\n                attribute_dims=point.attribute_dims)\n        else:\n            return arr\n"
  },
  {
    "path": "mmdet3d/core/bbox/structures/custom_box.py",
    "content": "# nuScenes dev-kit.\n# Code written by Oscar Beijbom, 2018.\n\nimport copy\nimport os.path as osp\nimport struct\nfrom abc import ABC, abstractmethod\nfrom functools import reduce\nfrom typing import Tuple, List, Dict\n\nimport cv2\nimport numpy as np\nfrom matplotlib.axes import Axes\nfrom pyquaternion import Quaternion\n\nfrom nuscenes.lidarseg.lidarseg_utils import colormap_to_colors, create_lidarseg_legend\nfrom nuscenes.utils.data_io import load_bin_file\nfrom nuscenes.utils.geometry_utils import view_points, transform_matrix\nfrom nuscenes.utils.data_classes import Box\n\n\n\nclass CustomBox(Box):\n    \"\"\" Simple data class representing a 3d box including, label, score and velocity. \"\"\"\n\n    def __init__(self,\n                 center: List[float],\n                 size: List[float],\n                 orientation: Quaternion,\n                 label: int = np.nan,\n                 score: float = np.nan,\n                 velocity: Tuple = (np.nan, np.nan, np.nan),\n                 name: str = None,\n                 token: str = None):\n        \"\"\"\n        :param center: Center of box given as x, y, z.\n        :param size: Size of box in width, length, height.\n        :param orientation: Box orientation.\n        :param label: Integer label, optional.\n        :param score: Classification score, optional.\n        :param velocity: Box velocity in x, y, z direction.\n        :param name: Box name, optional. Can be used e.g. for denote category name.\n        :param token: Unique string identifier from DB.\n        \"\"\"\n        # assert not np.any(np.isnan(center))\n        # assert not np.any(np.isnan(size))\n        # assert len(center) == 3\n        # assert len(size) == 3\n        # assert type(orientation) == Quaternion\n\n        # self.center = np.array(center)\n        # self.wlh = np.array(size)\n        # self.orientation = orientation\n        # self.label = int(label) if not np.isnan(label) else label\n        # self.score = float(score) if not np.isnan(score) else score\n        # self.velocity = np.array(velocity)\n        # self.name = name\n        # self.token = token\n\n        super().__init__(\n            center=center,\n            size=size,\n            orientation=orientation,\n            label=label,\n            score = score,\n            velocity = velocity,\n            name = name,\n            token = token\n        )\n\n    def render(self,\n               axis: Axes,\n               view: np.ndarray = np.eye(3),\n               normalize: bool = False,\n               colors: Tuple = ('b', 'r', 'k'),\n               linewidth: float = 2) -> None:\n        \"\"\"\n        Renders the box in the provided Matplotlib axis.\n        :param axis: Axis onto which the box should be drawn.\n        :param view: <np.array: 3, 3>. Define a projection in needed (e.g. for drawing projection in an image).\n        :param normalize: Whether to normalize the remaining coordinate.\n        :param colors: (<Matplotlib.colors>: 3). Valid Matplotlib colors (<str> or normalized RGB tuple) for front,\n            back and sides.\n        :param linewidth: Width in pixel of the box sides.\n        \"\"\"\n        corners = view_points(self.corners(), view, normalize=normalize)[:2, :]\n\n        def draw_rect(selected_corners, color):\n            prev = selected_corners[-1]\n            for corner in selected_corners:\n                axis.plot([prev[0], corner[0]], [prev[1], corner[1]], color=color, linewidth=linewidth)\n                prev = corner\n\n        # Draw the sides\n        for i in range(4):\n            axis.plot([corners.T[i][0], corners.T[i + 4][0]],\n                      [corners.T[i][1], corners.T[i + 4][1]],\n                      color=colors[2], linewidth=linewidth)\n\n        # Draw front (first 4 corners) and rear (last 4 corners) rectangles(3d)/lines(2d)\n        draw_rect(corners.T[:4], colors[0])\n        draw_rect(corners.T[4:], colors[1])\n\n        # Draw line indicating the front\n        center_bottom_forward = np.mean(corners.T[2:4], axis=0)\n        center_bottom = np.mean(corners.T[[2, 3, 7, 6]], axis=0)\n        axis.plot([center_bottom[0], center_bottom_forward[0]],\n                  [center_bottom[1], center_bottom_forward[1]],\n                  color=colors[0], linewidth=linewidth)\n        # from IPython import embed\n        # embed()\n        # exit()  \n        #   \n        # In [1]: corners.T\n        # Out[1]: \n        # array([[135.10084664, 217.64073984],\n        #     [145.04250652, 217.12554824],\n        #     [145.04250652, 217.12554824],\n        #     [135.10084664, 217.64073984],\n        #     [134.95749348, 214.87445176],\n        #     [144.89915336, 214.35926016],\n        #     [144.89915336, 214.35926016],\n        #     [134.95749348, 214.87445176]])  \n        x_coords, y_coords = zip(*corners.T[[0,1,6,7]])\n        # axis.fill(x_coords, y_coords, colors[0], alpha=0.8)\n\n"
  },
  {
    "path": "mmdet3d/core/bbox/structures/depth_box3d.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\n\nfrom mmdet3d.core.points import BasePoints\nfrom .base_box3d import BaseInstance3DBoxes\nfrom .utils import rotation_3d_in_axis\n\n\nclass DepthInstance3DBoxes(BaseInstance3DBoxes):\n    \"\"\"3D boxes of instances in Depth coordinates.\n\n    Coordinates in Depth:\n\n    .. code-block:: none\n\n                    up z    y front (yaw=-0.5*pi)\n                       ^   ^\n                       |  /\n                       | /\n                       0 ------> x right (yaw=0)\n\n    The relative coordinate of bottom center in a Depth box is (0.5, 0.5, 0),\n    and the yaw is around the z axis, thus the rotation axis=2.\n    The yaw is 0 at the positive direction of x axis, and decreases from\n    the positive direction of x to the positive direction of y.\n    Also note that rotation of DepthInstance3DBoxes is counterclockwise,\n    which is reverse to the definition of the yaw angle (clockwise).\n\n    A refactor is ongoing to make the three coordinate systems\n    easier to understand and convert between each other.\n\n    Attributes:\n        tensor (torch.Tensor): Float matrix of N x box_dim.\n        box_dim (int): Integer indicates the dimension of a box\n            Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).\n        with_yaw (bool): If True, the value of yaw will be set to 0 as minmax\n            boxes.\n    \"\"\"\n    YAW_AXIS = 2\n\n    @property\n    def gravity_center(self):\n        \"\"\"torch.Tensor: A tensor with center of each box in shape (N, 3).\"\"\"\n        bottom_center = self.bottom_center\n        gravity_center = torch.zeros_like(bottom_center)\n        gravity_center[:, :2] = bottom_center[:, :2]\n        gravity_center[:, 2] = bottom_center[:, 2] + self.tensor[:, 5] * 0.5\n        return gravity_center\n\n    @property\n    def corners(self):\n        \"\"\"torch.Tensor: Coordinates of corners of all the boxes\n        in shape (N, 8, 3).\n\n        Convert the boxes to corners in clockwise order, in form of\n        ``(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)``\n\n        .. code-block:: none\n\n                                           up z\n                            front y           ^\n                                 /            |\n                                /             |\n                  (x0, y1, z1) + -----------  + (x1, y1, z1)\n                              /|            / |\n                             / |           /  |\n               (x0, y0, z1) + ----------- +   + (x1, y1, z0)\n                            |  /      .   |  /\n                            | / origin    | /\n               (x0, y0, z0) + ----------- + --------> right x\n                                          (x1, y0, z0)\n        \"\"\"\n        if self.tensor.numel() == 0:\n            return torch.empty([0, 8, 3], device=self.tensor.device)\n\n        dims = self.dims\n        corners_norm = torch.from_numpy(\n            np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1)).to(\n                device=dims.device, dtype=dims.dtype)\n\n        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]\n        # use relative origin (0.5, 0.5, 0)\n        corners_norm = corners_norm - dims.new_tensor([0.5, 0.5, 0])\n        corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])\n\n        # rotate around z axis\n        corners = rotation_3d_in_axis(\n            corners, self.tensor[:, 6], axis=self.YAW_AXIS)\n        corners += self.tensor[:, :3].view(-1, 1, 3)\n        return corners\n\n    def rotate(self, angle, points=None):\n        \"\"\"Rotate boxes with points (optional) with the given angle or rotation\n        matrix.\n\n        Args:\n            angle (float | torch.Tensor | np.ndarray):\n                Rotation angle or rotation matrix.\n            points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):\n                Points to rotate. Defaults to None.\n\n        Returns:\n            tuple or None: When ``points`` is None, the function returns\n                None, otherwise it returns the rotated points and the\n                rotation matrix ``rot_mat_T``.\n        \"\"\"\n        if not isinstance(angle, torch.Tensor):\n            angle = self.tensor.new_tensor(angle)\n\n        assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1, \\\n            f'invalid rotation angle shape {angle.shape}'\n\n        if angle.numel() == 1:\n            self.tensor[:, 0:3], rot_mat_T = rotation_3d_in_axis(\n                self.tensor[:, 0:3],\n                angle,\n                axis=self.YAW_AXIS,\n                return_mat=True)\n        else:\n            rot_mat_T = angle\n            rot_sin = rot_mat_T[0, 1]\n            rot_cos = rot_mat_T[0, 0]\n            angle = np.arctan2(rot_sin, rot_cos)\n            self.tensor[:, 0:3] = self.tensor[:, 0:3] @ rot_mat_T\n\n        if self.with_yaw:\n            self.tensor[:, 6] += angle\n        else:\n            # for axis-aligned boxes, we take the new\n            # enclosing axis-aligned boxes after rotation\n            corners_rot = self.corners @ rot_mat_T\n            new_x_size = corners_rot[..., 0].max(\n                dim=1, keepdim=True)[0] - corners_rot[..., 0].min(\n                    dim=1, keepdim=True)[0]\n            new_y_size = corners_rot[..., 1].max(\n                dim=1, keepdim=True)[0] - corners_rot[..., 1].min(\n                    dim=1, keepdim=True)[0]\n            self.tensor[:, 3:5] = torch.cat((new_x_size, new_y_size), dim=-1)\n\n        if points is not None:\n            if isinstance(points, torch.Tensor):\n                points[:, :3] = points[:, :3] @ rot_mat_T\n            elif isinstance(points, np.ndarray):\n                rot_mat_T = rot_mat_T.cpu().numpy()\n                points[:, :3] = np.dot(points[:, :3], rot_mat_T)\n            elif isinstance(points, BasePoints):\n                points.rotate(rot_mat_T)\n            else:\n                raise ValueError\n            return points, rot_mat_T\n\n    def flip(self, bev_direction='horizontal', points=None):\n        \"\"\"Flip the boxes in BEV along given BEV direction.\n\n        In Depth coordinates, it flips x (horizontal) or y (vertical) axis.\n\n        Args:\n            bev_direction (str, optional): Flip direction\n                (horizontal or vertical). Defaults to 'horizontal'.\n            points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):\n                Points to flip. Defaults to None.\n\n        Returns:\n            torch.Tensor, numpy.ndarray or None: Flipped points.\n        \"\"\"\n        assert bev_direction in ('horizontal', 'vertical')\n        if bev_direction == 'horizontal':\n            self.tensor[:, 0::7] = -self.tensor[:, 0::7]\n            if self.with_yaw:\n                self.tensor[:, 6] = -self.tensor[:, 6] + np.pi\n        elif bev_direction == 'vertical':\n            self.tensor[:, 1::7] = -self.tensor[:, 1::7]\n            if self.with_yaw:\n                self.tensor[:, 6] = -self.tensor[:, 6]\n\n        if points is not None:\n            assert isinstance(points, (torch.Tensor, np.ndarray, BasePoints))\n            if isinstance(points, (torch.Tensor, np.ndarray)):\n                if bev_direction == 'horizontal':\n                    points[:, 0] = -points[:, 0]\n                elif bev_direction == 'vertical':\n                    points[:, 1] = -points[:, 1]\n            elif isinstance(points, BasePoints):\n                points.flip(bev_direction)\n            return points\n\n    def convert_to(self, dst, rt_mat=None):\n        \"\"\"Convert self to ``dst`` mode.\n\n        Args:\n            dst (:obj:`Box3DMode`): The target Box mode.\n            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and\n                translation matrix between different coordinates.\n                Defaults to None.\n                The conversion from ``src`` coordinates to ``dst`` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n\n        Returns:\n            :obj:`DepthInstance3DBoxes`:\n                The converted box of the same type in the ``dst`` mode.\n        \"\"\"\n        from .box_3d_mode import Box3DMode\n        return Box3DMode.convert(\n            box=self, src=Box3DMode.DEPTH, dst=dst, rt_mat=rt_mat)\n\n    def enlarged_box(self, extra_width):\n        \"\"\"Enlarge the length, width and height boxes.\n\n        Args:\n            extra_width (float | torch.Tensor): Extra width to enlarge the box.\n\n        Returns:\n            :obj:`DepthInstance3DBoxes`: Enlarged boxes.\n        \"\"\"\n        enlarged_boxes = self.tensor.clone()\n        enlarged_boxes[:, 3:6] += extra_width * 2\n        # bottom center z minus extra_width\n        enlarged_boxes[:, 2] -= extra_width\n        return self.new_box(enlarged_boxes)\n\n    def get_surface_line_center(self):\n        \"\"\"Compute surface and line center of bounding boxes.\n\n        Returns:\n            torch.Tensor: Surface and line center of bounding boxes.\n        \"\"\"\n        obj_size = self.dims\n        center = self.gravity_center.view(-1, 1, 3)\n        batch_size = center.shape[0]\n\n        rot_sin = torch.sin(-self.yaw)\n        rot_cos = torch.cos(-self.yaw)\n        rot_mat_T = self.yaw.new_zeros(tuple(list(self.yaw.shape) + [3, 3]))\n        rot_mat_T[..., 0, 0] = rot_cos\n        rot_mat_T[..., 0, 1] = -rot_sin\n        rot_mat_T[..., 1, 0] = rot_sin\n        rot_mat_T[..., 1, 1] = rot_cos\n        rot_mat_T[..., 2, 2] = 1\n\n        # Get the object surface center\n        offset = obj_size.new_tensor([[0, 0, 1], [0, 0, -1], [0, 1, 0],\n                                      [0, -1, 0], [1, 0, 0], [-1, 0, 0]])\n        offset = offset.view(1, 6, 3) / 2\n        surface_3d = (offset *\n                      obj_size.view(batch_size, 1, 3).repeat(1, 6, 1)).reshape(\n                          -1, 3)\n\n        # Get the object line center\n        offset = obj_size.new_tensor([[1, 0, 1], [-1, 0, 1], [0, 1, 1],\n                                      [0, -1, 1], [1, 0, -1], [-1, 0, -1],\n                                      [0, 1, -1], [0, -1, -1], [1, 1, 0],\n                                      [1, -1, 0], [-1, 1, 0], [-1, -1, 0]])\n        offset = offset.view(1, 12, 3) / 2\n\n        line_3d = (offset *\n                   obj_size.view(batch_size, 1, 3).repeat(1, 12, 1)).reshape(\n                       -1, 3)\n\n        surface_rot = rot_mat_T.repeat(6, 1, 1)\n        surface_3d = torch.matmul(surface_3d.unsqueeze(-2),\n                                  surface_rot).squeeze(-2)\n        surface_center = center.repeat(1, 6, 1).reshape(-1, 3) + surface_3d\n\n        line_rot = rot_mat_T.repeat(12, 1, 1)\n        line_3d = torch.matmul(line_3d.unsqueeze(-2), line_rot).squeeze(-2)\n        line_center = center.repeat(1, 12, 1).reshape(-1, 3) + line_3d\n\n        return surface_center, line_center\n"
  },
  {
    "path": "mmdet3d/core/bbox/structures/lidar_box3d.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\n\nfrom mmdet3d.core.points import BasePoints\nfrom .base_box3d import BaseInstance3DBoxes\nfrom .utils import rotation_3d_in_axis\n\n\nclass LiDARInstance3DBoxes(BaseInstance3DBoxes):\n    \"\"\"3D boxes of instances in LIDAR coordinates.\n\n    Coordinates in LiDAR:\n\n    .. code-block:: none\n\n                                up z    x front (yaw=0)\n                                   ^   ^\n                                   |  /\n                                   | /\n       (yaw=0.5*pi) left y <------ 0\n\n    The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),\n    and the yaw is around the z axis, thus the rotation axis=2.\n    The yaw is 0 at the positive direction of x axis, and increases from\n    the positive direction of x to the positive direction of y.\n\n    A refactor is ongoing to make the three coordinate systems\n    easier to understand and convert between each other.\n\n    Attributes:\n        tensor (torch.Tensor): Float matrix of N x box_dim.\n        box_dim (int): Integer indicating the dimension of a box.\n            Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).\n        with_yaw (bool): If True, the value of yaw will be set to 0 as minmax\n            boxes.\n    \"\"\"\n    YAW_AXIS = 2\n\n    @property\n    def gravity_center(self):\n        \"\"\"torch.Tensor: A tensor with center of each box in shape (N, 3).\"\"\"\n        bottom_center = self.bottom_center\n        gravity_center = torch.zeros_like(bottom_center)\n        gravity_center[:, :2] = bottom_center[:, :2]\n        gravity_center[:, 2] = bottom_center[:, 2] + self.tensor[:, 5] * 0.5\n        return gravity_center\n\n    @property\n    def corners(self):\n        \"\"\"torch.Tensor: Coordinates of corners of all the boxes\n        in shape (N, 8, 3).\n\n        Convert the boxes to corners in clockwise order, in form of\n        ``(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)``\n\n        .. code-block:: none\n\n                                           up z\n                            front x           ^\n                                 /            |\n                                /             |\n                  (x1, y0, z1) + -----------  + (x1, y1, z1)\n                              /|            / |\n                             / |           /  |\n               (x0, y0, z1) + ----------- +   + (x1, y1, z0)\n                            |  /      .   |  /\n                            | / origin    | /\n            left y<-------- + ----------- + (x0, y1, z0)\n                (x0, y0, z0)\n        \"\"\"\n        if self.tensor.numel() == 0:\n            return torch.empty([0, 8, 3], device=self.tensor.device)\n\n        dims = self.dims\n        corners_norm = torch.from_numpy(\n            np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1)).to(\n                device=dims.device, dtype=dims.dtype)\n\n        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]\n        # use relative origin [0.5, 0.5, 0]\n        corners_norm = corners_norm - dims.new_tensor([0.5, 0.5, 0])\n        corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])\n\n        # rotate around z axis\n        corners = rotation_3d_in_axis(\n            corners, self.tensor[:, 6], axis=self.YAW_AXIS)\n        corners += self.tensor[:, :3].view(-1, 1, 3)\n        return corners\n\n    def rotate(self, angle, points=None):\n        \"\"\"Rotate boxes with points (optional) with the given angle or rotation\n        matrix.\n\n        Args:\n            angles (float | torch.Tensor | np.ndarray):\n                Rotation angle or rotation matrix.\n            points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):\n                Points to rotate. Defaults to None.\n\n        Returns:\n            tuple or None: When ``points`` is None, the function returns\n                None, otherwise it returns the rotated points and the\n                rotation matrix ``rot_mat_T``.\n        \"\"\"\n        if not isinstance(angle, torch.Tensor):\n            angle = self.tensor.new_tensor(angle)\n\n        assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1, \\\n            f'invalid rotation angle shape {angle.shape}'\n\n        if angle.numel() == 1:\n            self.tensor[:, 0:3], rot_mat_T = rotation_3d_in_axis(\n                self.tensor[:, 0:3],\n                angle,\n                axis=self.YAW_AXIS,\n                return_mat=True)\n        else:\n            rot_mat_T = angle\n            rot_sin = rot_mat_T[0, 1]\n            rot_cos = rot_mat_T[0, 0]\n            angle = np.arctan2(rot_sin, rot_cos)\n            self.tensor[:, 0:3] = self.tensor[:, 0:3] @ rot_mat_T\n\n        self.tensor[:, 6] += angle\n\n        if self.tensor.shape[1] == 9:\n            # rotate velo vector\n            self.tensor[:, 7:9] = self.tensor[:, 7:9] @ rot_mat_T[:2, :2]\n\n        if points is not None:\n            if isinstance(points, torch.Tensor):\n                points[:, :3] = points[:, :3] @ rot_mat_T\n            elif isinstance(points, np.ndarray):\n                rot_mat_T = rot_mat_T.cpu().numpy()\n                points[:, :3] = np.dot(points[:, :3], rot_mat_T)\n            elif isinstance(points, BasePoints):\n                points.rotate(rot_mat_T)\n            else:\n                raise ValueError\n            return points, rot_mat_T\n\n    def flip(self, bev_direction='horizontal', points=None):\n        \"\"\"Flip the boxes in BEV along given BEV direction.\n\n        In LIDAR coordinates, it flips the y (horizontal) or x (vertical) axis.\n\n        Args:\n            bev_direction (str): Flip direction (horizontal or vertical).\n            points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):\n                Points to flip. Defaults to None.\n\n        Returns:\n            torch.Tensor, numpy.ndarray or None: Flipped points.\n        \"\"\"\n        assert bev_direction in ('horizontal', 'vertical')\n        if bev_direction == 'horizontal':\n            self.tensor[:, 1::7] = -self.tensor[:, 1::7]\n            if self.with_yaw:\n                self.tensor[:, 6] = -self.tensor[:, 6]\n        elif bev_direction == 'vertical':\n            self.tensor[:, 0::7] = -self.tensor[:, 0::7]\n            if self.with_yaw:\n                self.tensor[:, 6] = -self.tensor[:, 6] + np.pi\n\n        if points is not None:\n            assert isinstance(points, (torch.Tensor, np.ndarray, BasePoints))\n            if isinstance(points, (torch.Tensor, np.ndarray)):\n                if bev_direction == 'horizontal':\n                    points[:, 1] = -points[:, 1]\n                elif bev_direction == 'vertical':\n                    points[:, 0] = -points[:, 0]\n            elif isinstance(points, BasePoints):\n                points.flip(bev_direction)\n            return points\n\n    def convert_to(self, dst, rt_mat=None):\n        \"\"\"Convert self to ``dst`` mode.\n\n        Args:\n            dst (:obj:`Box3DMode`): the target Box mode\n            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and\n                translation matrix between different coordinates.\n                Defaults to None.\n                The conversion from ``src`` coordinates to ``dst`` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n\n        Returns:\n            :obj:`BaseInstance3DBoxes`:\n                The converted box of the same type in the ``dst`` mode.\n        \"\"\"\n        from .box_3d_mode import Box3DMode\n        return Box3DMode.convert(\n            box=self, src=Box3DMode.LIDAR, dst=dst, rt_mat=rt_mat)\n\n    def enlarged_box(self, extra_width):\n        \"\"\"Enlarge the length, width and height boxes.\n\n        Args:\n            extra_width (float | torch.Tensor): Extra width to enlarge the box.\n\n        Returns:\n            :obj:`LiDARInstance3DBoxes`: Enlarged boxes.\n        \"\"\"\n        enlarged_boxes = self.tensor.clone()\n        enlarged_boxes[:, 3:6] += extra_width * 2\n        # bottom center z minus extra_width\n        enlarged_boxes[:, 2] -= extra_width\n        return self.new_box(enlarged_boxes)\n"
  },
  {
    "path": "mmdet3d/core/bbox/structures/utils.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom logging import warning\n\nimport numpy as np\nimport torch\n\nfrom mmdet3d.core.utils import array_converter\n\n\n@array_converter(apply_to=('val', ))\ndef limit_period(val, offset=0.5, period=np.pi):\n    \"\"\"Limit the value into a period for periodic function.\n\n    Args:\n        val (torch.Tensor | np.ndarray): The value to be converted.\n        offset (float, optional): Offset to set the value range.\n            Defaults to 0.5.\n        period ([type], optional): Period of the value. Defaults to np.pi.\n\n    Returns:\n        (torch.Tensor | np.ndarray): Value in the range of\n            [-offset * period, (1-offset) * period]\n    \"\"\"\n    limited_val = val - torch.floor(val / period + offset) * period\n    return limited_val\n\n\n@array_converter(apply_to=('points', 'angles'))\ndef rotation_3d_in_axis(points,\n                        angles,\n                        axis=0,\n                        return_mat=False,\n                        clockwise=False):\n    \"\"\"Rotate points by angles according to axis.\n\n    Args:\n        points (np.ndarray | torch.Tensor | list | tuple ):\n            Points of shape (N, M, 3).\n        angles (np.ndarray | torch.Tensor | list | tuple | float):\n            Vector of angles in shape (N,)\n        axis (int, optional): The axis to be rotated. Defaults to 0.\n        return_mat: Whether or not return the rotation matrix (transposed).\n            Defaults to False.\n        clockwise: Whether the rotation is clockwise. Defaults to False.\n\n    Raises:\n        ValueError: when the axis is not in range [0, 1, 2], it will\n            raise value error.\n\n    Returns:\n        (torch.Tensor | np.ndarray): Rotated points in shape (N, M, 3).\n    \"\"\"\n    batch_free = len(points.shape) == 2\n    if batch_free:\n        points = points[None]\n\n    if isinstance(angles, float) or len(angles.shape) == 0:\n        angles = torch.full(points.shape[:1], angles)\n\n    assert len(points.shape) == 3 and len(angles.shape) == 1 \\\n        and points.shape[0] == angles.shape[0], f'Incorrect shape of points ' \\\n        f'angles: {points.shape}, {angles.shape}'\n\n    assert points.shape[-1] in [2, 3], \\\n        f'Points size should be 2 or 3 instead of {points.shape[-1]}'\n\n    rot_sin = torch.sin(angles)\n    rot_cos = torch.cos(angles)\n    ones = torch.ones_like(rot_cos)\n    zeros = torch.zeros_like(rot_cos)\n\n    if points.shape[-1] == 3:\n        if axis == 1 or axis == -2:\n            rot_mat_T = torch.stack([\n                torch.stack([rot_cos, zeros, -rot_sin]),\n                torch.stack([zeros, ones, zeros]),\n                torch.stack([rot_sin, zeros, rot_cos])\n            ])\n        elif axis == 2 or axis == -1:\n            rot_mat_T = torch.stack([\n                torch.stack([rot_cos, rot_sin, zeros]),\n                torch.stack([-rot_sin, rot_cos, zeros]),\n                torch.stack([zeros, zeros, ones])\n            ])\n        elif axis == 0 or axis == -3:\n            rot_mat_T = torch.stack([\n                torch.stack([ones, zeros, zeros]),\n                torch.stack([zeros, rot_cos, rot_sin]),\n                torch.stack([zeros, -rot_sin, rot_cos])\n            ])\n        else:\n            raise ValueError(f'axis should in range '\n                             f'[-3, -2, -1, 0, 1, 2], got {axis}')\n    else:\n        rot_mat_T = torch.stack([\n            torch.stack([rot_cos, rot_sin]),\n            torch.stack([-rot_sin, rot_cos])\n        ])\n\n    if clockwise:\n        rot_mat_T = rot_mat_T.transpose(0, 1)\n\n    if points.shape[0] == 0:\n        points_new = points\n    else:\n        points_new = torch.einsum('aij,jka->aik', points, rot_mat_T)\n\n    if batch_free:\n        points_new = points_new.squeeze(0)\n\n    if return_mat:\n        rot_mat_T = torch.einsum('jka->ajk', rot_mat_T)\n        if batch_free:\n            rot_mat_T = rot_mat_T.squeeze(0)\n        return points_new, rot_mat_T\n    else:\n        return points_new\n\n\n@array_converter(apply_to=('boxes_xywhr', ))\ndef xywhr2xyxyr(boxes_xywhr):\n    \"\"\"Convert a rotated boxes in XYWHR format to XYXYR format.\n\n    Args:\n        boxes_xywhr (torch.Tensor | np.ndarray): Rotated boxes in XYWHR format.\n\n    Returns:\n        (torch.Tensor | np.ndarray): Converted boxes in XYXYR format.\n    \"\"\"\n    boxes = torch.zeros_like(boxes_xywhr)\n    half_w = boxes_xywhr[..., 2] / 2\n    half_h = boxes_xywhr[..., 3] / 2\n\n    boxes[..., 0] = boxes_xywhr[..., 0] - half_w\n    boxes[..., 1] = boxes_xywhr[..., 1] - half_h\n    boxes[..., 2] = boxes_xywhr[..., 0] + half_w\n    boxes[..., 3] = boxes_xywhr[..., 1] + half_h\n    boxes[..., 4] = boxes_xywhr[..., 4]\n    return boxes\n\n\ndef get_box_type(box_type):\n    \"\"\"Get the type and mode of box structure.\n\n    Args:\n        box_type (str): The type of box structure.\n            The valid value are \"LiDAR\", \"Camera\", or \"Depth\".\n\n    Raises:\n        ValueError: A ValueError is raised when `box_type`\n            does not belong to the three valid types.\n\n    Returns:\n        tuple: Box type and box mode.\n    \"\"\"\n    from .box_3d_mode import (Box3DMode, CameraInstance3DBoxes,\n                              DepthInstance3DBoxes, LiDARInstance3DBoxes)\n    box_type_lower = box_type.lower()\n    if box_type_lower == 'lidar':\n        box_type_3d = LiDARInstance3DBoxes\n        box_mode_3d = Box3DMode.LIDAR\n    elif box_type_lower == 'camera':\n        box_type_3d = CameraInstance3DBoxes\n        box_mode_3d = Box3DMode.CAM\n    elif box_type_lower == 'depth':\n        box_type_3d = DepthInstance3DBoxes\n        box_mode_3d = Box3DMode.DEPTH\n    else:\n        raise ValueError('Only \"box_type\" of \"camera\", \"lidar\", \"depth\"'\n                         f' are supported, got {box_type}')\n\n    return box_type_3d, box_mode_3d\n\n\n@array_converter(apply_to=('points_3d', 'proj_mat'))\ndef points_cam2img(points_3d, proj_mat, with_depth=False):\n    \"\"\"Project points in camera coordinates to image coordinates.\n\n    Args:\n        points_3d (torch.Tensor | np.ndarray): Points in shape (N, 3)\n        proj_mat (torch.Tensor | np.ndarray):\n            Transformation matrix between coordinates.\n        with_depth (bool, optional): Whether to keep depth in the output.\n            Defaults to False.\n\n    Returns:\n        (torch.Tensor | np.ndarray): Points in image coordinates,\n            with shape [N, 2] if `with_depth=False`, else [N, 3].\n    \"\"\"\n    points_shape = list(points_3d.shape)\n    points_shape[-1] = 1\n\n    assert len(proj_mat.shape) == 2, 'The dimension of the projection'\\\n        f' matrix should be 2 instead of {len(proj_mat.shape)}.'\n    d1, d2 = proj_mat.shape[:2]\n    assert (d1 == 3 and d2 == 3) or (d1 == 3 and d2 == 4) or (\n        d1 == 4 and d2 == 4), 'The shape of the projection matrix'\\\n        f' ({d1}*{d2}) is not supported.'\n    if d1 == 3:\n        proj_mat_expanded = torch.eye(\n            4, device=proj_mat.device, dtype=proj_mat.dtype)\n        proj_mat_expanded[:d1, :d2] = proj_mat\n        proj_mat = proj_mat_expanded\n\n    # previous implementation use new_zeros, new_one yields better results\n    points_4 = torch.cat([points_3d, points_3d.new_ones(points_shape)], dim=-1)\n\n    point_2d = points_4 @ proj_mat.T\n    point_2d_res = point_2d[..., :2] / point_2d[..., 2:3]\n\n    if with_depth:\n        point_2d_res = torch.cat([point_2d_res, point_2d[..., 2:3]], dim=-1)\n\n    return point_2d_res\n\n\n@array_converter(apply_to=('points', 'cam2img'))\ndef points_img2cam(points, cam2img):\n    \"\"\"Project points in image coordinates to camera coordinates.\n\n    Args:\n        points (torch.Tensor): 2.5D points in 2D images, [N, 3],\n            3 corresponds with x, y in the image and depth.\n        cam2img (torch.Tensor): Camera intrinsic matrix. The shape can be\n            [3, 3], [3, 4] or [4, 4].\n\n    Returns:\n        torch.Tensor: points in 3D space. [N, 3],\n            3 corresponds with x, y, z in 3D space.\n    \"\"\"\n    assert cam2img.shape[0] <= 4\n    assert cam2img.shape[1] <= 4\n    assert points.shape[1] == 3\n\n    xys = points[:, :2]\n    depths = points[:, 2].view(-1, 1)\n    unnormed_xys = torch.cat([xys * depths, depths], dim=1)\n\n    pad_cam2img = torch.eye(4, dtype=xys.dtype, device=xys.device)\n    pad_cam2img[:cam2img.shape[0], :cam2img.shape[1]] = cam2img\n    inv_pad_cam2img = torch.inverse(pad_cam2img).transpose(0, 1)\n\n    # Do operation in homogeneous coordinates.\n    num_points = unnormed_xys.shape[0]\n    homo_xys = torch.cat([unnormed_xys, xys.new_ones((num_points, 1))], dim=1)\n    points3D = torch.mm(homo_xys, inv_pad_cam2img)[:, :3]\n\n    return points3D\n\n\ndef mono_cam_box2vis(cam_box):\n    \"\"\"This is a post-processing function on the bboxes from Mono-3D task. If\n    we want to perform projection visualization, we need to:\n\n        1. rotate the box along x-axis for np.pi / 2 (roll)\n        2. change orientation from local yaw to global yaw\n        3. convert yaw by (np.pi / 2 - yaw)\n\n    After applying this function, we can project and draw it on 2D images.\n\n    Args:\n        cam_box (:obj:`CameraInstance3DBoxes`): 3D bbox in camera coordinate\n            system before conversion. Could be gt bbox loaded from dataset\n            or network prediction output.\n\n    Returns:\n        :obj:`CameraInstance3DBoxes`: Box after conversion.\n    \"\"\"\n    warning.warn('DeprecationWarning: The hack of yaw and dimension in the '\n                 'monocular 3D detection on nuScenes has been removed. The '\n                 'function mono_cam_box2vis will be deprecated.')\n    from . import CameraInstance3DBoxes\n    assert isinstance(cam_box, CameraInstance3DBoxes), \\\n        'input bbox should be CameraInstance3DBoxes!'\n\n    loc = cam_box.gravity_center\n    dim = cam_box.dims\n    yaw = cam_box.yaw\n    feats = cam_box.tensor[:, 7:]\n    # rotate along x-axis for np.pi / 2\n    # see also here: https://github.com/open-mmlab/mmdetection3d/blob/master/mmdet3d/datasets/nuscenes_mono_dataset.py#L557  # noqa\n    dim[:, [1, 2]] = dim[:, [2, 1]]\n    # change local yaw to global yaw for visualization\n    # refer to https://github.com/open-mmlab/mmdetection3d/blob/master/mmdet3d/datasets/nuscenes_mono_dataset.py#L164-L166  # noqa\n    yaw += torch.atan2(loc[:, 0], loc[:, 2])\n    # convert yaw by (-yaw - np.pi / 2)\n    # this is because mono 3D box class such as `NuScenesBox` has different\n    # definition of rotation with our `CameraInstance3DBoxes`\n    yaw = -yaw - np.pi / 2\n    cam_box = torch.cat([loc, dim, yaw[:, None], feats], dim=1)\n    cam_box = CameraInstance3DBoxes(\n        cam_box, box_dim=cam_box.shape[-1], origin=(0.5, 0.5, 0.5))\n\n    return cam_box\n\n\ndef get_proj_mat_by_coord_type(img_meta, coord_type):\n    \"\"\"Obtain image features using points.\n\n    Args:\n        img_meta (dict): Meta info.\n        coord_type (str): 'DEPTH' or 'CAMERA' or 'LIDAR'.\n            Can be case-insensitive.\n\n    Returns:\n        torch.Tensor: transformation matrix.\n    \"\"\"\n    coord_type = coord_type.upper()\n    mapping = {'LIDAR': 'lidar2img', 'DEPTH': 'depth2img', 'CAMERA': 'cam2img'}\n    assert coord_type in mapping.keys()\n    return img_meta[mapping[coord_type]]\n\n\ndef yaw2local(yaw, loc):\n    \"\"\"Transform global yaw to local yaw (alpha in kitti) in camera\n    coordinates, ranges from -pi to pi.\n\n    Args:\n        yaw (torch.Tensor): A vector with local yaw of each box.\n            shape: (N, )\n        loc (torch.Tensor): gravity center of each box.\n            shape: (N, 3)\n\n    Returns:\n        torch.Tensor: local yaw (alpha in kitti).\n    \"\"\"\n    local_yaw = yaw - torch.atan2(loc[:, 0], loc[:, 2])\n    larger_idx = (local_yaw > np.pi).nonzero(as_tuple=False)\n    small_idx = (local_yaw < -np.pi).nonzero(as_tuple=False)\n    if len(larger_idx) != 0:\n        local_yaw[larger_idx] -= 2 * np.pi\n    if len(small_idx) != 0:\n        local_yaw[small_idx] += 2 * np.pi\n\n    return local_yaw\n"
  },
  {
    "path": "mmdet3d/core/bbox/transforms.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\n\ndef bbox3d_mapping_back(bboxes, scale_factor, flip_horizontal, flip_vertical):\n    \"\"\"Map bboxes from testing scale to original image scale.\n\n    Args:\n        bboxes (:obj:`BaseInstance3DBoxes`): Boxes to be mapped back.\n        scale_factor (float): Scale factor.\n        flip_horizontal (bool): Whether to flip horizontally.\n        flip_vertical (bool): Whether to flip vertically.\n\n    Returns:\n        :obj:`BaseInstance3DBoxes`: Boxes mapped back.\n    \"\"\"\n    new_bboxes = bboxes.clone()\n    if flip_horizontal:\n        new_bboxes.flip('horizontal')\n    if flip_vertical:\n        new_bboxes.flip('vertical')\n    new_bboxes.scale(1 / scale_factor)\n\n    return new_bboxes\n\n\ndef bbox3d2roi(bbox_list):\n    \"\"\"Convert a list of bounding boxes to roi format.\n\n    Args:\n        bbox_list (list[torch.Tensor]): A list of bounding boxes\n            corresponding to a batch of images.\n\n    Returns:\n        torch.Tensor: Region of interests in shape (n, c), where\n            the channels are in order of [batch_ind, x, y ...].\n    \"\"\"\n    rois_list = []\n    for img_id, bboxes in enumerate(bbox_list):\n        if bboxes.size(0) > 0:\n            img_inds = bboxes.new_full((bboxes.size(0), 1), img_id)\n            rois = torch.cat([img_inds, bboxes], dim=-1)\n        else:\n            rois = torch.zeros_like(bboxes)\n        rois_list.append(rois)\n    rois = torch.cat(rois_list, 0)\n    return rois\n\n\ndef bbox3d2result(bboxes, scores, labels, attrs=None):\n    \"\"\"Convert detection results to a list of numpy arrays.\n\n    Args:\n        bboxes (torch.Tensor): Bounding boxes with shape (N, 5).\n        labels (torch.Tensor): Labels with shape (N, ).\n        scores (torch.Tensor): Scores with shape (N, ).\n        attrs (torch.Tensor, optional): Attributes with shape (N, ).\n            Defaults to None.\n\n    Returns:\n        dict[str, torch.Tensor]: Bounding box results in cpu mode.\n\n            - boxes_3d (torch.Tensor): 3D boxes.\n            - scores (torch.Tensor): Prediction scores.\n            - labels_3d (torch.Tensor): Box labels.\n            - attrs_3d (torch.Tensor, optional): Box attributes.\n    \"\"\"\n    result_dict = dict(\n        boxes_3d=bboxes.to('cpu'),\n        scores_3d=scores.cpu(),\n        labels_3d=labels.cpu())\n\n    if attrs is not None:\n        result_dict['attrs_3d'] = attrs.cpu()\n\n    return result_dict\n"
  },
  {
    "path": "mmdet3d/core/bbox/util.py",
    "content": "import torch\n\ndef normalize_bbox(bboxes, pc_range):\n\n    cx = bboxes[..., 0:1]\n    cy = bboxes[..., 1:2]\n    cz = bboxes[..., 2:3]\n    w = bboxes[..., 3:4].log()\n    l = bboxes[..., 4:5].log()\n    h = bboxes[..., 5:6].log()\n\n    rot = bboxes[..., 6:7]\n    if bboxes.size(-1) > 7:\n        vx = bboxes[..., 7:8] \n        vy = bboxes[..., 8:9]\n        normalized_bboxes = torch.cat(\n            (cx, cy, w, l, cz, h, rot.sin(), rot.cos(), vx, vy), dim=-1\n        )\n    else:\n        normalized_bboxes = torch.cat(\n            (cx, cy, w, l, cz, h, rot.sin(), rot.cos()), dim=-1\n        )\n    return normalized_bboxes\n\n\ndef denormalize_bbox(normalized_bboxes, pc_range):\n    # rotation \n    rot_sine = normalized_bboxes[..., 6:7]\n\n    rot_cosine = normalized_bboxes[..., 7:8]\n    rot = torch.atan2(rot_sine, rot_cosine)\n\n    # center in the bev\n    cx = normalized_bboxes[..., 0:1]\n    cy = normalized_bboxes[..., 1:2]\n    cz = normalized_bboxes[..., 4:5]\n   \n    # size\n    w = normalized_bboxes[..., 2:3]\n    l = normalized_bboxes[..., 3:4]\n    h = normalized_bboxes[..., 5:6]\n\n    w = w.exp() \n    l = l.exp() \n    h = h.exp() \n    if normalized_bboxes.size(-1) > 8:\n         # velocity \n        vx = normalized_bboxes[:, 8:9]\n        vy = normalized_bboxes[:, 9:10]\n        denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1)\n    else:\n        denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1)\n    return denormalized_bboxes"
  },
  {
    "path": "mmdet3d/core/evaluation/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .indoor_eval import indoor_eval\nfrom .instance_seg_eval import instance_seg_eval\nfrom .kitti_utils import kitti_eval, kitti_eval_coco_style\nfrom .lyft_eval import lyft_eval\nfrom .seg_eval import seg_eval\n\n__all__ = [\n    'kitti_eval_coco_style', 'kitti_eval', 'indoor_eval', 'lyft_eval',\n    'seg_eval', 'instance_seg_eval'\n]\n"
  },
  {
    "path": "mmdet3d/core/evaluation/indoor_eval.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nfrom mmcv.utils import print_log\nfrom terminaltables import AsciiTable\n\n\ndef average_precision(recalls, precisions, mode='area'):\n    \"\"\"Calculate average precision (for single or multiple scales).\n\n    Args:\n        recalls (np.ndarray): Recalls with shape of (num_scales, num_dets)\n            or (num_dets, ).\n        precisions (np.ndarray): Precisions with shape of\n            (num_scales, num_dets) or (num_dets, ).\n        mode (str): 'area' or '11points', 'area' means calculating the area\n            under precision-recall curve, '11points' means calculating\n            the average precision of recalls at [0, 0.1, ..., 1]\n\n    Returns:\n        float or np.ndarray: Calculated average precision.\n    \"\"\"\n    if recalls.ndim == 1:\n        recalls = recalls[np.newaxis, :]\n        precisions = precisions[np.newaxis, :]\n\n    assert recalls.shape == precisions.shape\n    assert recalls.ndim == 2\n\n    num_scales = recalls.shape[0]\n    ap = np.zeros(num_scales, dtype=np.float32)\n    if mode == 'area':\n        zeros = np.zeros((num_scales, 1), dtype=recalls.dtype)\n        ones = np.ones((num_scales, 1), dtype=recalls.dtype)\n        mrec = np.hstack((zeros, recalls, ones))\n        mpre = np.hstack((zeros, precisions, zeros))\n        for i in range(mpre.shape[1] - 1, 0, -1):\n            mpre[:, i - 1] = np.maximum(mpre[:, i - 1], mpre[:, i])\n        for i in range(num_scales):\n            ind = np.where(mrec[i, 1:] != mrec[i, :-1])[0]\n            ap[i] = np.sum(\n                (mrec[i, ind + 1] - mrec[i, ind]) * mpre[i, ind + 1])\n    elif mode == '11points':\n        for i in range(num_scales):\n            for thr in np.arange(0, 1 + 1e-3, 0.1):\n                precs = precisions[i, recalls[i, :] >= thr]\n                prec = precs.max() if precs.size > 0 else 0\n                ap[i] += prec\n            ap /= 11\n    else:\n        raise ValueError(\n            'Unrecognized mode, only \"area\" and \"11points\" are supported')\n    return ap\n\n\ndef eval_det_cls(pred, gt, iou_thr=None):\n    \"\"\"Generic functions to compute precision/recall for object detection for a\n    single class.\n\n    Args:\n        pred (dict): Predictions mapping from image id to bounding boxes\n            and scores.\n        gt (dict): Ground truths mapping from image id to bounding boxes.\n        iou_thr (list[float]): A list of iou thresholds.\n\n    Return:\n        tuple (np.ndarray, np.ndarray, float): Recalls, precisions and\n            average precision.\n    \"\"\"\n\n    # {img_id: {'bbox': box structure, 'det': matched list}}\n    class_recs = {}\n    npos = 0\n    for img_id in gt.keys():\n        cur_gt_num = len(gt[img_id])\n        if cur_gt_num != 0:\n            gt_cur = torch.zeros([cur_gt_num, 7], dtype=torch.float32)\n            for i in range(cur_gt_num):\n                gt_cur[i] = gt[img_id][i].tensor\n            bbox = gt[img_id][0].new_box(gt_cur)\n        else:\n            bbox = gt[img_id]\n        det = [[False] * len(bbox) for i in iou_thr]\n        npos += len(bbox)\n        class_recs[img_id] = {'bbox': bbox, 'det': det}\n\n    # construct dets\n    image_ids = []\n    confidence = []\n    ious = []\n    for img_id in pred.keys():\n        cur_num = len(pred[img_id])\n        if cur_num == 0:\n            continue\n        pred_cur = torch.zeros((cur_num, 7), dtype=torch.float32)\n        box_idx = 0\n        for box, score in pred[img_id]:\n            image_ids.append(img_id)\n            confidence.append(score)\n            pred_cur[box_idx] = box.tensor\n            box_idx += 1\n        pred_cur = box.new_box(pred_cur)\n        gt_cur = class_recs[img_id]['bbox']\n        if len(gt_cur) > 0:\n            # calculate iou in each image\n            iou_cur = pred_cur.overlaps(pred_cur, gt_cur)\n            for i in range(cur_num):\n                ious.append(iou_cur[i])\n        else:\n            for i in range(cur_num):\n                ious.append(np.zeros(1))\n\n    confidence = np.array(confidence)\n\n    # sort by confidence\n    sorted_ind = np.argsort(-confidence)\n    image_ids = [image_ids[x] for x in sorted_ind]\n    ious = [ious[x] for x in sorted_ind]\n\n    # go down dets and mark TPs and FPs\n    nd = len(image_ids)\n    tp_thr = [np.zeros(nd) for i in iou_thr]\n    fp_thr = [np.zeros(nd) for i in iou_thr]\n    for d in range(nd):\n        R = class_recs[image_ids[d]]\n        iou_max = -np.inf\n        BBGT = R['bbox']\n        cur_iou = ious[d]\n\n        if len(BBGT) > 0:\n            # compute overlaps\n            for j in range(len(BBGT)):\n                # iou = get_iou_main(get_iou_func, (bb, BBGT[j,...]))\n                iou = cur_iou[j]\n                if iou > iou_max:\n                    iou_max = iou\n                    jmax = j\n\n        for iou_idx, thresh in enumerate(iou_thr):\n            if iou_max > thresh:\n                if not R['det'][iou_idx][jmax]:\n                    tp_thr[iou_idx][d] = 1.\n                    R['det'][iou_idx][jmax] = 1\n                else:\n                    fp_thr[iou_idx][d] = 1.\n            else:\n                fp_thr[iou_idx][d] = 1.\n\n    ret = []\n    for iou_idx, thresh in enumerate(iou_thr):\n        # compute precision recall\n        fp = np.cumsum(fp_thr[iou_idx])\n        tp = np.cumsum(tp_thr[iou_idx])\n        recall = tp / float(npos)\n        # avoid divide by zero in case the first detection matches a difficult\n        # ground truth\n        precision = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)\n        ap = average_precision(recall, precision)\n        ret.append((recall, precision, ap))\n\n    return ret\n\n\ndef eval_map_recall(pred, gt, ovthresh=None):\n    \"\"\"Evaluate mAP and recall.\n\n    Generic functions to compute precision/recall for object detection\n        for multiple classes.\n\n    Args:\n        pred (dict): Information of detection results,\n            which maps class_id and predictions.\n        gt (dict): Information of ground truths, which maps class_id and\n            ground truths.\n        ovthresh (list[float], optional): iou threshold. Default: None.\n\n    Return:\n        tuple[dict]: dict results of recall, AP, and precision for all classes.\n    \"\"\"\n\n    ret_values = {}\n    for classname in gt.keys():\n        if classname in pred:\n            ret_values[classname] = eval_det_cls(pred[classname],\n                                                 gt[classname], ovthresh)\n    recall = [{} for i in ovthresh]\n    precision = [{} for i in ovthresh]\n    ap = [{} for i in ovthresh]\n\n    for label in gt.keys():\n        for iou_idx, thresh in enumerate(ovthresh):\n            if label in pred:\n                recall[iou_idx][label], precision[iou_idx][label], ap[iou_idx][\n                    label] = ret_values[label][iou_idx]\n            else:\n                recall[iou_idx][label] = np.zeros(1)\n                precision[iou_idx][label] = np.zeros(1)\n                ap[iou_idx][label] = np.zeros(1)\n\n    return recall, precision, ap\n\n\ndef indoor_eval(gt_annos,\n                dt_annos,\n                metric,\n                label2cat,\n                logger=None,\n                box_type_3d=None,\n                box_mode_3d=None):\n    \"\"\"Indoor Evaluation.\n\n    Evaluate the result of the detection.\n\n    Args:\n        gt_annos (list[dict]): Ground truth annotations.\n        dt_annos (list[dict]): Detection annotations. the dict\n            includes the following keys\n\n            - labels_3d (torch.Tensor): Labels of boxes.\n            - boxes_3d (:obj:`BaseInstance3DBoxes`):\n                3D bounding boxes in Depth coordinate.\n            - scores_3d (torch.Tensor): Scores of boxes.\n        metric (list[float]): IoU thresholds for computing average precisions.\n        label2cat (dict): Map from label to category.\n        logger (logging.Logger | str, optional): The way to print the mAP\n            summary. See `mmdet.utils.print_log()` for details. Default: None.\n\n    Return:\n        dict[str, float]: Dict of results.\n    \"\"\"\n    assert len(dt_annos) == len(gt_annos)\n    pred = {}  # map {class_id: pred}\n    gt = {}  # map {class_id: gt}\n    for img_id in range(len(dt_annos)):\n        # parse detected annotations\n        det_anno = dt_annos[img_id]\n        for i in range(len(det_anno['labels_3d'])):\n            label = det_anno['labels_3d'].numpy()[i]\n            bbox = det_anno['boxes_3d'].convert_to(box_mode_3d)[i]\n            score = det_anno['scores_3d'].numpy()[i]\n            if label not in pred:\n                pred[int(label)] = {}\n            if img_id not in pred[label]:\n                pred[int(label)][img_id] = []\n            if label not in gt:\n                gt[int(label)] = {}\n            if img_id not in gt[label]:\n                gt[int(label)][img_id] = []\n            pred[int(label)][img_id].append((bbox, score))\n\n        # parse gt annotations\n        gt_anno = gt_annos[img_id]\n        if gt_anno['gt_num'] != 0:\n            gt_boxes = box_type_3d(\n                gt_anno['gt_boxes_upright_depth'],\n                box_dim=gt_anno['gt_boxes_upright_depth'].shape[-1],\n                origin=(0.5, 0.5, 0.5)).convert_to(box_mode_3d)\n            labels_3d = gt_anno['class']\n        else:\n            gt_boxes = box_type_3d(np.array([], dtype=np.float32))\n            labels_3d = np.array([], dtype=np.int64)\n\n        for i in range(len(labels_3d)):\n            label = labels_3d[i]\n            bbox = gt_boxes[i]\n            if label not in gt:\n                gt[label] = {}\n            if img_id not in gt[label]:\n                gt[label][img_id] = []\n            gt[label][img_id].append(bbox)\n\n    rec, prec, ap = eval_map_recall(pred, gt, metric)\n    ret_dict = dict()\n    header = ['classes']\n    table_columns = [[label2cat[label]\n                      for label in ap[0].keys()] + ['Overall']]\n\n    for i, iou_thresh in enumerate(metric):\n        header.append(f'AP_{iou_thresh:.2f}')\n        header.append(f'AR_{iou_thresh:.2f}')\n        rec_list = []\n        for label in ap[i].keys():\n            ret_dict[f'{label2cat[label]}_AP_{iou_thresh:.2f}'] = float(\n                ap[i][label][0])\n        ret_dict[f'mAP_{iou_thresh:.2f}'] = float(\n            np.mean(list(ap[i].values())))\n\n        table_columns.append(list(map(float, list(ap[i].values()))))\n        table_columns[-1] += [ret_dict[f'mAP_{iou_thresh:.2f}']]\n        table_columns[-1] = [f'{x:.4f}' for x in table_columns[-1]]\n\n        for label in rec[i].keys():\n            ret_dict[f'{label2cat[label]}_rec_{iou_thresh:.2f}'] = float(\n                rec[i][label][-1])\n            rec_list.append(rec[i][label][-1])\n        ret_dict[f'mAR_{iou_thresh:.2f}'] = float(np.mean(rec_list))\n\n        table_columns.append(list(map(float, rec_list)))\n        table_columns[-1] += [ret_dict[f'mAR_{iou_thresh:.2f}']]\n        table_columns[-1] = [f'{x:.4f}' for x in table_columns[-1]]\n\n    table_data = [header]\n    table_rows = list(zip(*table_columns))\n    table_data += table_rows\n    table = AsciiTable(table_data)\n    table.inner_footing_row_border = True\n    print_log('\\n' + table.table, logger=logger)\n\n    return ret_dict\n"
  },
  {
    "path": "mmdet3d/core/evaluation/instance_seg_eval.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nfrom mmcv.utils import print_log\nfrom terminaltables import AsciiTable\n\nfrom .scannet_utils.evaluate_semantic_instance import scannet_eval\n\n\ndef aggregate_predictions(masks, labels, scores, valid_class_ids):\n    \"\"\"Maps predictions to ScanNet evaluator format.\n\n    Args:\n        masks (list[torch.Tensor]): Per scene predicted instance masks.\n        labels (list[torch.Tensor]): Per scene predicted instance labels.\n        scores (list[torch.Tensor]): Per scene predicted instance scores.\n        valid_class_ids (tuple[int]): Ids of valid categories.\n\n    Returns:\n        list[dict]: Per scene aggregated predictions.\n    \"\"\"\n    infos = []\n    for id, (mask, label, score) in enumerate(zip(masks, labels, scores)):\n        mask = mask.clone().numpy()\n        label = label.clone().numpy()\n        score = score.clone().numpy()\n        info = dict()\n        n_instances = mask.max() + 1\n        for i in range(n_instances):\n            # match pred_instance['filename'] from assign_instances_for_scan\n            file_name = f'{id}_{i}'\n            info[file_name] = dict()\n            info[file_name]['mask'] = (mask == i).astype(np.int)\n            info[file_name]['label_id'] = valid_class_ids[label[i]]\n            info[file_name]['conf'] = score[i]\n        infos.append(info)\n    return infos\n\n\ndef rename_gt(gt_semantic_masks, gt_instance_masks, valid_class_ids):\n    \"\"\"Maps gt instance and semantic masks to instance masks for ScanNet\n    evaluator.\n\n    Args:\n        gt_semantic_masks (list[torch.Tensor]): Per scene gt semantic masks.\n        gt_instance_masks (list[torch.Tensor]): Per scene gt instance masks.\n        valid_class_ids (tuple[int]): Ids of valid categories.\n\n    Returns:\n        list[np.array]: Per scene instance masks.\n    \"\"\"\n    renamed_instance_masks = []\n    for semantic_mask, instance_mask in zip(gt_semantic_masks,\n                                            gt_instance_masks):\n        semantic_mask = semantic_mask.clone().numpy()\n        instance_mask = instance_mask.clone().numpy()\n        unique = np.unique(instance_mask)\n        assert len(unique) < 1000\n        for i in unique:\n            semantic_instance = semantic_mask[instance_mask == i]\n            semantic_unique = np.unique(semantic_instance)\n            assert len(semantic_unique) == 1\n            if semantic_unique[0] < len(valid_class_ids):\n                instance_mask[\n                    instance_mask ==\n                    i] = 1000 * valid_class_ids[semantic_unique[0]] + i\n        renamed_instance_masks.append(instance_mask)\n    return renamed_instance_masks\n\n\ndef instance_seg_eval(gt_semantic_masks,\n                      gt_instance_masks,\n                      pred_instance_masks,\n                      pred_instance_labels,\n                      pred_instance_scores,\n                      valid_class_ids,\n                      class_labels,\n                      options=None,\n                      logger=None):\n    \"\"\"Instance Segmentation Evaluation.\n\n    Evaluate the result of the instance segmentation.\n\n    Args:\n        gt_semantic_masks (list[torch.Tensor]): Ground truth semantic masks.\n        gt_instance_masks (list[torch.Tensor]): Ground truth instance masks.\n        pred_instance_masks (list[torch.Tensor]): Predicted instance masks.\n        pred_instance_labels (list[torch.Tensor]): Predicted instance labels.\n        pred_instance_scores (list[torch.Tensor]): Predicted instance labels.\n        valid_class_ids (tuple[int]): Ids of valid categories.\n        class_labels (tuple[str]): Names of valid categories.\n        options (dict, optional): Additional options. Keys may contain:\n            `overlaps`, `min_region_sizes`, `distance_threshes`,\n            `distance_confs`. Default: None.\n        logger (logging.Logger | str, optional): The way to print the mAP\n            summary. See `mmdet.utils.print_log()` for details. Default: None.\n\n    Returns:\n        dict[str, float]: Dict of results.\n    \"\"\"\n    assert len(valid_class_ids) == len(class_labels)\n    id_to_label = {\n        valid_class_ids[i]: class_labels[i]\n        for i in range(len(valid_class_ids))\n    }\n    preds = aggregate_predictions(\n        masks=pred_instance_masks,\n        labels=pred_instance_labels,\n        scores=pred_instance_scores,\n        valid_class_ids=valid_class_ids)\n    gts = rename_gt(gt_semantic_masks, gt_instance_masks, valid_class_ids)\n    metrics = scannet_eval(\n        preds=preds,\n        gts=gts,\n        options=options,\n        valid_class_ids=valid_class_ids,\n        class_labels=class_labels,\n        id_to_label=id_to_label)\n    header = ['classes', 'AP_0.25', 'AP_0.50', 'AP']\n    rows = []\n    for label, data in metrics['classes'].items():\n        aps = [data['ap25%'], data['ap50%'], data['ap']]\n        rows.append([label] + [f'{ap:.4f}' for ap in aps])\n    aps = metrics['all_ap_25%'], metrics['all_ap_50%'], metrics['all_ap']\n    footer = ['Overall'] + [f'{ap:.4f}' for ap in aps]\n    table = AsciiTable([header] + rows + [footer])\n    table.inner_footing_row_border = True\n    print_log('\\n' + table.table, logger=logger)\n    return metrics\n"
  },
  {
    "path": "mmdet3d/core/evaluation/kitti_utils/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .eval import kitti_eval, kitti_eval_coco_style\n\n__all__ = ['kitti_eval', 'kitti_eval_coco_style']\n"
  },
  {
    "path": "mmdet3d/core/evaluation/kitti_utils/eval.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport gc\nimport io as sysio\n\nimport numba\nimport numpy as np\n\n\n@numba.jit\ndef get_thresholds(scores: np.ndarray, num_gt, num_sample_pts=41):\n    scores.sort()\n    scores = scores[::-1]\n    current_recall = 0\n    thresholds = []\n    for i, score in enumerate(scores):\n        l_recall = (i + 1) / num_gt\n        if i < (len(scores) - 1):\n            r_recall = (i + 2) / num_gt\n        else:\n            r_recall = l_recall\n        if (((r_recall - current_recall) < (current_recall - l_recall))\n                and (i < (len(scores) - 1))):\n            continue\n        # recall = l_recall\n        thresholds.append(score)\n        current_recall += 1 / (num_sample_pts - 1.0)\n    return thresholds\n\n\ndef clean_data(gt_anno, dt_anno, current_class, difficulty):\n    CLASS_NAMES = ['car', 'pedestrian', 'cyclist']\n    MIN_HEIGHT = [40, 25, 25]\n    MAX_OCCLUSION = [0, 1, 2]\n    MAX_TRUNCATION = [0.15, 0.3, 0.5]\n    dc_bboxes, ignored_gt, ignored_dt = [], [], []\n    current_cls_name = CLASS_NAMES[current_class].lower()\n    num_gt = len(gt_anno['name'])\n    num_dt = len(dt_anno['name'])\n    num_valid_gt = 0\n    for i in range(num_gt):\n        bbox = gt_anno['bbox'][i]\n        gt_name = gt_anno['name'][i].lower()\n        height = bbox[3] - bbox[1]\n        valid_class = -1\n        if (gt_name == current_cls_name):\n            valid_class = 1\n        elif (current_cls_name == 'Pedestrian'.lower()\n              and 'Person_sitting'.lower() == gt_name):\n            valid_class = 0\n        elif (current_cls_name == 'Car'.lower() and 'Van'.lower() == gt_name):\n            valid_class = 0\n        else:\n            valid_class = -1\n        ignore = False\n        if ((gt_anno['occluded'][i] > MAX_OCCLUSION[difficulty])\n                or (gt_anno['truncated'][i] > MAX_TRUNCATION[difficulty])\n                or (height <= MIN_HEIGHT[difficulty])):\n            ignore = True\n        if valid_class == 1 and not ignore:\n            ignored_gt.append(0)\n            num_valid_gt += 1\n        elif (valid_class == 0 or (ignore and (valid_class == 1))):\n            ignored_gt.append(1)\n        else:\n            ignored_gt.append(-1)\n    # for i in range(num_gt):\n        if gt_anno['name'][i] == 'DontCare':\n            dc_bboxes.append(gt_anno['bbox'][i])\n    for i in range(num_dt):\n        if (dt_anno['name'][i].lower() == current_cls_name):\n            valid_class = 1\n        else:\n            valid_class = -1\n        height = abs(dt_anno['bbox'][i, 3] - dt_anno['bbox'][i, 1])\n        if height < MIN_HEIGHT[difficulty]:\n            ignored_dt.append(1)\n        elif valid_class == 1:\n            ignored_dt.append(0)\n        else:\n            ignored_dt.append(-1)\n\n    return num_valid_gt, ignored_gt, ignored_dt, dc_bboxes\n\n\n@numba.jit(nopython=True)\ndef image_box_overlap(boxes, query_boxes, criterion=-1):\n    N = boxes.shape[0]\n    K = query_boxes.shape[0]\n    overlaps = np.zeros((N, K), dtype=boxes.dtype)\n    for k in range(K):\n        qbox_area = ((query_boxes[k, 2] - query_boxes[k, 0]) *\n                     (query_boxes[k, 3] - query_boxes[k, 1]))\n        for n in range(N):\n            iw = (\n                min(boxes[n, 2], query_boxes[k, 2]) -\n                max(boxes[n, 0], query_boxes[k, 0]))\n            if iw > 0:\n                ih = (\n                    min(boxes[n, 3], query_boxes[k, 3]) -\n                    max(boxes[n, 1], query_boxes[k, 1]))\n                if ih > 0:\n                    if criterion == -1:\n                        ua = ((boxes[n, 2] - boxes[n, 0]) *\n                              (boxes[n, 3] - boxes[n, 1]) + qbox_area -\n                              iw * ih)\n                    elif criterion == 0:\n                        ua = ((boxes[n, 2] - boxes[n, 0]) *\n                              (boxes[n, 3] - boxes[n, 1]))\n                    elif criterion == 1:\n                        ua = qbox_area\n                    else:\n                        ua = 1.0\n                    overlaps[n, k] = iw * ih / ua\n    return overlaps\n\n\ndef bev_box_overlap(boxes, qboxes, criterion=-1):\n    from .rotate_iou import rotate_iou_gpu_eval\n    riou = rotate_iou_gpu_eval(boxes, qboxes, criterion)\n    return riou\n\n\n@numba.jit(nopython=True, parallel=True)\ndef d3_box_overlap_kernel(boxes, qboxes, rinc, criterion=-1):\n    # ONLY support overlap in CAMERA, not lidar.\n    # TODO: change to use prange for parallel mode, should check the difference\n    N, K = boxes.shape[0], qboxes.shape[0]\n    for i in numba.prange(N):\n        for j in numba.prange(K):\n            if rinc[i, j] > 0:\n                # iw = (min(boxes[i, 1] + boxes[i, 4], qboxes[j, 1] +\n                #         qboxes[j, 4]) - max(boxes[i, 1], qboxes[j, 1]))\n                iw = (\n                    min(boxes[i, 1], qboxes[j, 1]) -\n                    max(boxes[i, 1] - boxes[i, 4],\n                        qboxes[j, 1] - qboxes[j, 4]))\n\n                if iw > 0:\n                    area1 = boxes[i, 3] * boxes[i, 4] * boxes[i, 5]\n                    area2 = qboxes[j, 3] * qboxes[j, 4] * qboxes[j, 5]\n                    inc = iw * rinc[i, j]\n                    if criterion == -1:\n                        ua = (area1 + area2 - inc)\n                    elif criterion == 0:\n                        ua = area1\n                    elif criterion == 1:\n                        ua = area2\n                    else:\n                        ua = inc\n                    rinc[i, j] = inc / ua\n                else:\n                    rinc[i, j] = 0.0\n\n\ndef d3_box_overlap(boxes, qboxes, criterion=-1):\n    from .rotate_iou import rotate_iou_gpu_eval\n    rinc = rotate_iou_gpu_eval(boxes[:, [0, 2, 3, 5, 6]],\n                               qboxes[:, [0, 2, 3, 5, 6]], 2)\n    d3_box_overlap_kernel(boxes, qboxes, rinc, criterion)\n    return rinc\n\n\n@numba.jit(nopython=True)\ndef compute_statistics_jit(overlaps,\n                           gt_datas,\n                           dt_datas,\n                           ignored_gt,\n                           ignored_det,\n                           dc_bboxes,\n                           metric,\n                           min_overlap,\n                           thresh=0,\n                           compute_fp=False,\n                           compute_aos=False):\n\n    det_size = dt_datas.shape[0]\n    gt_size = gt_datas.shape[0]\n    dt_scores = dt_datas[:, -1]\n    dt_alphas = dt_datas[:, 4]\n    gt_alphas = gt_datas[:, 4]\n    dt_bboxes = dt_datas[:, :4]\n    # gt_bboxes = gt_datas[:, :4]\n\n    assigned_detection = [False] * det_size\n    ignored_threshold = [False] * det_size\n    if compute_fp:\n        for i in range(det_size):\n            if (dt_scores[i] < thresh):\n                ignored_threshold[i] = True\n    NO_DETECTION = -10000000\n    tp, fp, fn, similarity = 0, 0, 0, 0\n    # thresholds = [0.0]\n    # delta = [0.0]\n    thresholds = np.zeros((gt_size, ))\n    thresh_idx = 0\n    delta = np.zeros((gt_size, ))\n    delta_idx = 0\n    for i in range(gt_size):\n        if ignored_gt[i] == -1:\n            continue\n        det_idx = -1\n        valid_detection = NO_DETECTION\n        max_overlap = 0\n        assigned_ignored_det = False\n\n        for j in range(det_size):\n            if (ignored_det[j] == -1):\n                continue\n            if (assigned_detection[j]):\n                continue\n            if (ignored_threshold[j]):\n                continue\n            overlap = overlaps[j, i]\n            dt_score = dt_scores[j]\n            if (not compute_fp and (overlap > min_overlap)\n                    and dt_score > valid_detection):\n                det_idx = j\n                valid_detection = dt_score\n            elif (compute_fp and (overlap > min_overlap)\n                  and (overlap > max_overlap or assigned_ignored_det)\n                  and ignored_det[j] == 0):\n                max_overlap = overlap\n                det_idx = j\n                valid_detection = 1\n                assigned_ignored_det = False\n            elif (compute_fp and (overlap > min_overlap)\n                  and (valid_detection == NO_DETECTION)\n                  and ignored_det[j] == 1):\n                det_idx = j\n                valid_detection = 1\n                assigned_ignored_det = True\n\n        if (valid_detection == NO_DETECTION) and ignored_gt[i] == 0:\n            fn += 1\n        elif ((valid_detection != NO_DETECTION)\n              and (ignored_gt[i] == 1 or ignored_det[det_idx] == 1)):\n            assigned_detection[det_idx] = True\n        elif valid_detection != NO_DETECTION:\n            tp += 1\n            # thresholds.append(dt_scores[det_idx])\n            thresholds[thresh_idx] = dt_scores[det_idx]\n            thresh_idx += 1\n            if compute_aos:\n                # delta.append(gt_alphas[i] - dt_alphas[det_idx])\n                delta[delta_idx] = gt_alphas[i] - dt_alphas[det_idx]\n                delta_idx += 1\n\n            assigned_detection[det_idx] = True\n    if compute_fp:\n        for i in range(det_size):\n            if (not (assigned_detection[i] or ignored_det[i] == -1\n                     or ignored_det[i] == 1 or ignored_threshold[i])):\n                fp += 1\n        nstuff = 0\n        if metric == 0:\n            overlaps_dt_dc = image_box_overlap(dt_bboxes, dc_bboxes, 0)\n            for i in range(dc_bboxes.shape[0]):\n                for j in range(det_size):\n                    if (assigned_detection[j]):\n                        continue\n                    if (ignored_det[j] == -1 or ignored_det[j] == 1):\n                        continue\n                    if (ignored_threshold[j]):\n                        continue\n                    if overlaps_dt_dc[j, i] > min_overlap:\n                        assigned_detection[j] = True\n                        nstuff += 1\n        fp -= nstuff\n        if compute_aos:\n            tmp = np.zeros((fp + delta_idx, ))\n            # tmp = [0] * fp\n            for i in range(delta_idx):\n                tmp[i + fp] = (1.0 + np.cos(delta[i])) / 2.0\n                # tmp.append((1.0 + np.cos(delta[i])) / 2.0)\n            # assert len(tmp) == fp + tp\n            # assert len(delta) == tp\n            if tp > 0 or fp > 0:\n                similarity = np.sum(tmp)\n            else:\n                similarity = -1\n    return tp, fp, fn, similarity, thresholds[:thresh_idx]\n\n\ndef get_split_parts(num, num_part):\n    same_part = num // num_part\n    remain_num = num % num_part\n    if remain_num == 0:\n        return [same_part] * num_part\n    else:\n        return [same_part] * num_part + [remain_num]\n\n\n@numba.jit(nopython=True)\ndef fused_compute_statistics(overlaps,\n                             pr,\n                             gt_nums,\n                             dt_nums,\n                             dc_nums,\n                             gt_datas,\n                             dt_datas,\n                             dontcares,\n                             ignored_gts,\n                             ignored_dets,\n                             metric,\n                             min_overlap,\n                             thresholds,\n                             compute_aos=False):\n    gt_num = 0\n    dt_num = 0\n    dc_num = 0\n    for i in range(gt_nums.shape[0]):\n        for t, thresh in enumerate(thresholds):\n            overlap = overlaps[dt_num:dt_num + dt_nums[i],\n                               gt_num:gt_num + gt_nums[i]]\n\n            gt_data = gt_datas[gt_num:gt_num + gt_nums[i]]\n            dt_data = dt_datas[dt_num:dt_num + dt_nums[i]]\n            ignored_gt = ignored_gts[gt_num:gt_num + gt_nums[i]]\n            ignored_det = ignored_dets[dt_num:dt_num + dt_nums[i]]\n            dontcare = dontcares[dc_num:dc_num + dc_nums[i]]\n            tp, fp, fn, similarity, _ = compute_statistics_jit(\n                overlap,\n                gt_data,\n                dt_data,\n                ignored_gt,\n                ignored_det,\n                dontcare,\n                metric,\n                min_overlap=min_overlap,\n                thresh=thresh,\n                compute_fp=True,\n                compute_aos=compute_aos)\n            pr[t, 0] += tp\n            pr[t, 1] += fp\n            pr[t, 2] += fn\n            if similarity != -1:\n                pr[t, 3] += similarity\n        gt_num += gt_nums[i]\n        dt_num += dt_nums[i]\n        dc_num += dc_nums[i]\n\n\ndef calculate_iou_partly(gt_annos, dt_annos, metric, num_parts=50):\n    \"\"\"Fast iou algorithm. this function can be used independently to do result\n    analysis. Must be used in CAMERA coordinate system.\n\n    Args:\n        gt_annos (dict): Must from get_label_annos() in kitti_common.py.\n        dt_annos (dict): Must from get_label_annos() in kitti_common.py.\n        metric (int): Eval type. 0: bbox, 1: bev, 2: 3d.\n        num_parts (int): A parameter for fast calculate algorithm.\n    \"\"\"\n    assert len(gt_annos) == len(dt_annos)\n    total_dt_num = np.stack([len(a['name']) for a in dt_annos], 0)\n    total_gt_num = np.stack([len(a['name']) for a in gt_annos], 0)\n    num_examples = len(gt_annos)\n    split_parts = get_split_parts(num_examples, num_parts)\n    parted_overlaps = []\n    example_idx = 0\n\n    for num_part in split_parts:\n        gt_annos_part = gt_annos[example_idx:example_idx + num_part]\n        dt_annos_part = dt_annos[example_idx:example_idx + num_part]\n        if metric == 0:\n            gt_boxes = np.concatenate([a['bbox'] for a in gt_annos_part], 0)\n            dt_boxes = np.concatenate([a['bbox'] for a in dt_annos_part], 0)\n            overlap_part = image_box_overlap(gt_boxes, dt_boxes)\n        elif metric == 1:\n            loc = np.concatenate(\n                [a['location'][:, [0, 2]] for a in gt_annos_part], 0)\n            dims = np.concatenate(\n                [a['dimensions'][:, [0, 2]] for a in gt_annos_part], 0)\n            rots = np.concatenate([a['rotation_y'] for a in gt_annos_part], 0)\n            gt_boxes = np.concatenate([loc, dims, rots[..., np.newaxis]],\n                                      axis=1)\n            loc = np.concatenate(\n                [a['location'][:, [0, 2]] for a in dt_annos_part], 0)\n            dims = np.concatenate(\n                [a['dimensions'][:, [0, 2]] for a in dt_annos_part], 0)\n            rots = np.concatenate([a['rotation_y'] for a in dt_annos_part], 0)\n            dt_boxes = np.concatenate([loc, dims, rots[..., np.newaxis]],\n                                      axis=1)\n            overlap_part = bev_box_overlap(gt_boxes,\n                                           dt_boxes).astype(np.float64)\n        elif metric == 2:\n            loc = np.concatenate([a['location'] for a in gt_annos_part], 0)\n            dims = np.concatenate([a['dimensions'] for a in gt_annos_part], 0)\n            rots = np.concatenate([a['rotation_y'] for a in gt_annos_part], 0)\n            gt_boxes = np.concatenate([loc, dims, rots[..., np.newaxis]],\n                                      axis=1)\n            loc = np.concatenate([a['location'] for a in dt_annos_part], 0)\n            dims = np.concatenate([a['dimensions'] for a in dt_annos_part], 0)\n            rots = np.concatenate([a['rotation_y'] for a in dt_annos_part], 0)\n            dt_boxes = np.concatenate([loc, dims, rots[..., np.newaxis]],\n                                      axis=1)\n            overlap_part = d3_box_overlap(gt_boxes,\n                                          dt_boxes).astype(np.float64)\n        else:\n            raise ValueError('unknown metric')\n        parted_overlaps.append(overlap_part)\n        example_idx += num_part\n    overlaps = []\n    example_idx = 0\n    for j, num_part in enumerate(split_parts):\n        gt_annos_part = gt_annos[example_idx:example_idx + num_part]\n        dt_annos_part = dt_annos[example_idx:example_idx + num_part]\n        gt_num_idx, dt_num_idx = 0, 0\n        for i in range(num_part):\n            gt_box_num = total_gt_num[example_idx + i]\n            dt_box_num = total_dt_num[example_idx + i]\n            overlaps.append(\n                parted_overlaps[j][gt_num_idx:gt_num_idx + gt_box_num,\n                                   dt_num_idx:dt_num_idx + dt_box_num])\n            gt_num_idx += gt_box_num\n            dt_num_idx += dt_box_num\n        example_idx += num_part\n\n    return overlaps, parted_overlaps, total_gt_num, total_dt_num\n\n\ndef _prepare_data(gt_annos, dt_annos, current_class, difficulty):\n    gt_datas_list = []\n    dt_datas_list = []\n    total_dc_num = []\n    ignored_gts, ignored_dets, dontcares = [], [], []\n    total_num_valid_gt = 0\n    for i in range(len(gt_annos)):\n        rets = clean_data(gt_annos[i], dt_annos[i], current_class, difficulty)\n        num_valid_gt, ignored_gt, ignored_det, dc_bboxes = rets\n        ignored_gts.append(np.array(ignored_gt, dtype=np.int64))\n        ignored_dets.append(np.array(ignored_det, dtype=np.int64))\n        if len(dc_bboxes) == 0:\n            dc_bboxes = np.zeros((0, 4)).astype(np.float64)\n        else:\n            dc_bboxes = np.stack(dc_bboxes, 0).astype(np.float64)\n        total_dc_num.append(dc_bboxes.shape[0])\n        dontcares.append(dc_bboxes)\n        total_num_valid_gt += num_valid_gt\n        gt_datas = np.concatenate(\n            [gt_annos[i]['bbox'], gt_annos[i]['alpha'][..., np.newaxis]], 1)\n        dt_datas = np.concatenate([\n            dt_annos[i]['bbox'], dt_annos[i]['alpha'][..., np.newaxis],\n            dt_annos[i]['score'][..., np.newaxis]\n        ], 1)\n        gt_datas_list.append(gt_datas)\n        dt_datas_list.append(dt_datas)\n    total_dc_num = np.stack(total_dc_num, axis=0)\n    return (gt_datas_list, dt_datas_list, ignored_gts, ignored_dets, dontcares,\n            total_dc_num, total_num_valid_gt)\n\n\ndef eval_class(gt_annos,\n               dt_annos,\n               current_classes,\n               difficultys,\n               metric,\n               min_overlaps,\n               compute_aos=False,\n               num_parts=200):\n    \"\"\"Kitti eval. support 2d/bev/3d/aos eval. support 0.5:0.05:0.95 coco AP.\n\n    Args:\n        gt_annos (dict): Must from get_label_annos() in kitti_common.py.\n        dt_annos (dict): Must from get_label_annos() in kitti_common.py.\n        current_classes (list[int]): 0: car, 1: pedestrian, 2: cyclist.\n        difficultys (list[int]): Eval difficulty, 0: easy, 1: normal, 2: hard\n        metric (int): Eval type. 0: bbox, 1: bev, 2: 3d\n        min_overlaps (float): Min overlap. format:\n            [num_overlap, metric, class].\n        num_parts (int): A parameter for fast calculate algorithm\n\n    Returns:\n        dict[str, np.ndarray]: recall, precision and aos\n    \"\"\"\n    assert len(gt_annos) == len(dt_annos)\n    num_examples = len(gt_annos)\n    if num_examples < num_parts:\n        num_parts = num_examples\n    split_parts = get_split_parts(num_examples, num_parts)\n\n    rets = calculate_iou_partly(dt_annos, gt_annos, metric, num_parts)\n    overlaps, parted_overlaps, total_dt_num, total_gt_num = rets\n    N_SAMPLE_PTS = 41\n    num_minoverlap = len(min_overlaps)\n    num_class = len(current_classes)\n    num_difficulty = len(difficultys)\n    precision = np.zeros(\n        [num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS])\n    recall = np.zeros(\n        [num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS])\n    aos = np.zeros([num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS])\n    for m, current_class in enumerate(current_classes):\n        for idx_l, difficulty in enumerate(difficultys):\n            rets = _prepare_data(gt_annos, dt_annos, current_class, difficulty)\n            (gt_datas_list, dt_datas_list, ignored_gts, ignored_dets,\n             dontcares, total_dc_num, total_num_valid_gt) = rets\n            for k, min_overlap in enumerate(min_overlaps[:, metric, m]):\n                thresholdss = []\n                for i in range(len(gt_annos)):\n                    rets = compute_statistics_jit(\n                        overlaps[i],\n                        gt_datas_list[i],\n                        dt_datas_list[i],\n                        ignored_gts[i],\n                        ignored_dets[i],\n                        dontcares[i],\n                        metric,\n                        min_overlap=min_overlap,\n                        thresh=0.0,\n                        compute_fp=False)\n                    tp, fp, fn, similarity, thresholds = rets\n                    thresholdss += thresholds.tolist()\n                thresholdss = np.array(thresholdss)\n                thresholds = get_thresholds(thresholdss, total_num_valid_gt)\n                thresholds = np.array(thresholds)\n                pr = np.zeros([len(thresholds), 4])\n                idx = 0\n                for j, num_part in enumerate(split_parts):\n                    gt_datas_part = np.concatenate(\n                        gt_datas_list[idx:idx + num_part], 0)\n                    dt_datas_part = np.concatenate(\n                        dt_datas_list[idx:idx + num_part], 0)\n                    dc_datas_part = np.concatenate(\n                        dontcares[idx:idx + num_part], 0)\n                    ignored_dets_part = np.concatenate(\n                        ignored_dets[idx:idx + num_part], 0)\n                    ignored_gts_part = np.concatenate(\n                        ignored_gts[idx:idx + num_part], 0)\n                    fused_compute_statistics(\n                        parted_overlaps[j],\n                        pr,\n                        total_gt_num[idx:idx + num_part],\n                        total_dt_num[idx:idx + num_part],\n                        total_dc_num[idx:idx + num_part],\n                        gt_datas_part,\n                        dt_datas_part,\n                        dc_datas_part,\n                        ignored_gts_part,\n                        ignored_dets_part,\n                        metric,\n                        min_overlap=min_overlap,\n                        thresholds=thresholds,\n                        compute_aos=compute_aos)\n                    idx += num_part\n                for i in range(len(thresholds)):\n                    recall[m, idx_l, k, i] = pr[i, 0] / (pr[i, 0] + pr[i, 2])\n                    precision[m, idx_l, k, i] = pr[i, 0] / (\n                        pr[i, 0] + pr[i, 1])\n                    if compute_aos:\n                        aos[m, idx_l, k, i] = pr[i, 3] / (pr[i, 0] + pr[i, 1])\n                for i in range(len(thresholds)):\n                    precision[m, idx_l, k, i] = np.max(\n                        precision[m, idx_l, k, i:], axis=-1)\n                    recall[m, idx_l, k, i] = np.max(\n                        recall[m, idx_l, k, i:], axis=-1)\n                    if compute_aos:\n                        aos[m, idx_l, k, i] = np.max(\n                            aos[m, idx_l, k, i:], axis=-1)\n    ret_dict = {\n        'recall': recall,\n        'precision': precision,\n        'orientation': aos,\n    }\n\n    # clean temp variables\n    del overlaps\n    del parted_overlaps\n\n    gc.collect()\n    return ret_dict\n\n\ndef get_mAP11(prec):\n    sums = 0\n    for i in range(0, prec.shape[-1], 4):\n        sums = sums + prec[..., i]\n    return sums / 11 * 100\n\n\ndef get_mAP40(prec):\n    sums = 0\n    for i in range(1, prec.shape[-1]):\n        sums = sums + prec[..., i]\n    return sums / 40 * 100\n\n\ndef print_str(value, *arg, sstream=None):\n    if sstream is None:\n        sstream = sysio.StringIO()\n    sstream.truncate(0)\n    sstream.seek(0)\n    print(value, *arg, file=sstream)\n    return sstream.getvalue()\n\n\ndef do_eval(gt_annos,\n            dt_annos,\n            current_classes,\n            min_overlaps,\n            eval_types=['bbox', 'bev', '3d']):\n    # min_overlaps: [num_minoverlap, metric, num_class]\n    difficultys = [0, 1, 2]\n    mAP11_bbox = None\n    mAP11_aos = None\n    mAP40_bbox = None\n    mAP40_aos = None\n    if 'bbox' in eval_types:\n        ret = eval_class(\n            gt_annos,\n            dt_annos,\n            current_classes,\n            difficultys,\n            0,\n            min_overlaps,\n            compute_aos=('aos' in eval_types))\n        # ret: [num_class, num_diff, num_minoverlap, num_sample_points]\n        mAP11_bbox = get_mAP11(ret['precision'])\n        mAP40_bbox = get_mAP40(ret['precision'])\n        if 'aos' in eval_types:\n            mAP11_aos = get_mAP11(ret['orientation'])\n            mAP40_aos = get_mAP40(ret['orientation'])\n\n    mAP11_bev = None\n    mAP40_bev = None\n    if 'bev' in eval_types:\n        ret = eval_class(gt_annos, dt_annos, current_classes, difficultys, 1,\n                         min_overlaps)\n        mAP11_bev = get_mAP11(ret['precision'])\n        mAP40_bev = get_mAP40(ret['precision'])\n\n    mAP11_3d = None\n    mAP40_3d = None\n    if '3d' in eval_types:\n        ret = eval_class(gt_annos, dt_annos, current_classes, difficultys, 2,\n                         min_overlaps)\n        mAP11_3d = get_mAP11(ret['precision'])\n        mAP40_3d = get_mAP40(ret['precision'])\n    return (mAP11_bbox, mAP11_bev, mAP11_3d, mAP11_aos, mAP40_bbox, mAP40_bev,\n            mAP40_3d, mAP40_aos)\n\n\ndef do_coco_style_eval(gt_annos, dt_annos, current_classes, overlap_ranges,\n                       compute_aos):\n    # overlap_ranges: [range, metric, num_class]\n    min_overlaps = np.zeros([10, *overlap_ranges.shape[1:]])\n    for i in range(overlap_ranges.shape[1]):\n        for j in range(overlap_ranges.shape[2]):\n            min_overlaps[:, i, j] = np.linspace(*overlap_ranges[:, i, j])\n    mAP_bbox, mAP_bev, mAP_3d, mAP_aos, _, _, \\\n        _, _ = do_eval(gt_annos, dt_annos,\n                       current_classes, min_overlaps,\n                       compute_aos)\n    # ret: [num_class, num_diff, num_minoverlap]\n    mAP_bbox = mAP_bbox.mean(-1)\n    mAP_bev = mAP_bev.mean(-1)\n    mAP_3d = mAP_3d.mean(-1)\n    if mAP_aos is not None:\n        mAP_aos = mAP_aos.mean(-1)\n    return mAP_bbox, mAP_bev, mAP_3d, mAP_aos\n\n\ndef kitti_eval(gt_annos,\n               dt_annos,\n               current_classes,\n               eval_types=['bbox', 'bev', '3d']):\n    \"\"\"KITTI evaluation.\n\n    Args:\n        gt_annos (list[dict]): Contain gt information of each sample.\n        dt_annos (list[dict]): Contain detected information of each sample.\n        current_classes (list[str]): Classes to evaluation.\n        eval_types (list[str], optional): Types to eval.\n            Defaults to ['bbox', 'bev', '3d'].\n\n    Returns:\n        tuple: String and dict of evaluation results.\n    \"\"\"\n    assert len(eval_types) > 0, 'must contain at least one evaluation type'\n    if 'aos' in eval_types:\n        assert 'bbox' in eval_types, 'must evaluate bbox when evaluating aos'\n    overlap_0_7 = np.array([[0.7, 0.5, 0.5, 0.7,\n                             0.5], [0.7, 0.5, 0.5, 0.7, 0.5],\n                            [0.7, 0.5, 0.5, 0.7, 0.5]])\n    overlap_0_5 = np.array([[0.7, 0.5, 0.5, 0.7, 0.5],\n                            [0.5, 0.25, 0.25, 0.5, 0.25],\n                            [0.5, 0.25, 0.25, 0.5, 0.25]])\n    min_overlaps = np.stack([overlap_0_7, overlap_0_5], axis=0)  # [2, 3, 5]\n    class_to_name = {\n        0: 'Car',\n        1: 'Pedestrian',\n        2: 'Cyclist',\n        3: 'Van',\n        4: 'Person_sitting',\n    }\n    name_to_class = {v: n for n, v in class_to_name.items()}\n    if not isinstance(current_classes, (list, tuple)):\n        current_classes = [current_classes]\n    current_classes_int = []\n    for curcls in current_classes:\n        if isinstance(curcls, str):\n            current_classes_int.append(name_to_class[curcls])\n        else:\n            current_classes_int.append(curcls)\n    current_classes = current_classes_int\n    min_overlaps = min_overlaps[:, :, current_classes]\n    result = ''\n    # check whether alpha is valid\n    compute_aos = False\n    pred_alpha = False\n    valid_alpha_gt = False\n    for anno in dt_annos:\n        mask = (anno['alpha'] != -10)\n        if anno['alpha'][mask].shape[0] != 0:\n            pred_alpha = True\n            break\n    for anno in gt_annos:\n        if anno['alpha'][0] != -10:\n            valid_alpha_gt = True\n            break\n    compute_aos = (pred_alpha and valid_alpha_gt)\n    if compute_aos:\n        eval_types.append('aos')\n\n    mAP11_bbox, mAP11_bev, mAP11_3d, mAP11_aos, mAP40_bbox, mAP40_bev, \\\n        mAP40_3d, mAP40_aos = do_eval(gt_annos, dt_annos,\n                                      current_classes, min_overlaps,\n                                      eval_types)\n\n    ret_dict = {}\n    difficulty = ['easy', 'moderate', 'hard']\n\n    # calculate AP11\n    result += '\\n----------- AP11 Results ------------\\n\\n'\n    for j, curcls in enumerate(current_classes):\n        # mAP threshold array: [num_minoverlap, metric, class]\n        # mAP result: [num_class, num_diff, num_minoverlap]\n        curcls_name = class_to_name[curcls]\n        for i in range(min_overlaps.shape[0]):\n            # prepare results for print\n            result += ('{} AP11@{:.2f}, {:.2f}, {:.2f}:\\n'.format(\n                curcls_name, *min_overlaps[i, :, j]))\n            if mAP11_bbox is not None:\n                result += 'bbox AP11:{:.4f}, {:.4f}, {:.4f}\\n'.format(\n                    *mAP11_bbox[j, :, i])\n            if mAP11_bev is not None:\n                result += 'bev  AP11:{:.4f}, {:.4f}, {:.4f}\\n'.format(\n                    *mAP11_bev[j, :, i])\n            if mAP11_3d is not None:\n                result += '3d   AP11:{:.4f}, {:.4f}, {:.4f}\\n'.format(\n                    *mAP11_3d[j, :, i])\n            if compute_aos:\n                result += 'aos  AP11:{:.2f}, {:.2f}, {:.2f}\\n'.format(\n                    *mAP11_aos[j, :, i])\n\n            # prepare results for logger\n            for idx in range(3):\n                if i == 0:\n                    postfix = f'{difficulty[idx]}_strict'\n                else:\n                    postfix = f'{difficulty[idx]}_loose'\n                prefix = f'KITTI/{curcls_name}'\n                if mAP11_3d is not None:\n                    ret_dict[f'{prefix}_3D_AP11_{postfix}'] =\\\n                        mAP11_3d[j, idx, i]\n                if mAP11_bev is not None:\n                    ret_dict[f'{prefix}_BEV_AP11_{postfix}'] =\\\n                        mAP11_bev[j, idx, i]\n                if mAP11_bbox is not None:\n                    ret_dict[f'{prefix}_2D_AP11_{postfix}'] =\\\n                        mAP11_bbox[j, idx, i]\n\n    # calculate mAP11 over all classes if there are multiple classes\n    if len(current_classes) > 1:\n        # prepare results for print\n        result += ('\\nOverall AP11@{}, {}, {}:\\n'.format(*difficulty))\n        if mAP11_bbox is not None:\n            mAP11_bbox = mAP11_bbox.mean(axis=0)\n            result += 'bbox AP11:{:.4f}, {:.4f}, {:.4f}\\n'.format(\n                *mAP11_bbox[:, 0])\n        if mAP11_bev is not None:\n            mAP11_bev = mAP11_bev.mean(axis=0)\n            result += 'bev  AP11:{:.4f}, {:.4f}, {:.4f}\\n'.format(\n                *mAP11_bev[:, 0])\n        if mAP11_3d is not None:\n            mAP11_3d = mAP11_3d.mean(axis=0)\n            result += '3d   AP11:{:.4f}, {:.4f}, {:.4f}\\n'.format(*mAP11_3d[:,\n                                                                            0])\n        if compute_aos:\n            mAP11_aos = mAP11_aos.mean(axis=0)\n            result += 'aos  AP11:{:.2f}, {:.2f}, {:.2f}\\n'.format(\n                *mAP11_aos[:, 0])\n\n        # prepare results for logger\n        for idx in range(3):\n            postfix = f'{difficulty[idx]}'\n            if mAP11_3d is not None:\n                ret_dict[f'KITTI/Overall_3D_AP11_{postfix}'] = mAP11_3d[idx, 0]\n            if mAP11_bev is not None:\n                ret_dict[f'KITTI/Overall_BEV_AP11_{postfix}'] =\\\n                    mAP11_bev[idx, 0]\n            if mAP11_bbox is not None:\n                ret_dict[f'KITTI/Overall_2D_AP11_{postfix}'] =\\\n                    mAP11_bbox[idx, 0]\n\n    # Calculate AP40\n    result += '\\n----------- AP40 Results ------------\\n\\n'\n    for j, curcls in enumerate(current_classes):\n        # mAP threshold array: [num_minoverlap, metric, class]\n        # mAP result: [num_class, num_diff, num_minoverlap]\n        curcls_name = class_to_name[curcls]\n        for i in range(min_overlaps.shape[0]):\n            # prepare results for print\n            result += ('{} AP40@{:.2f}, {:.2f}, {:.2f}:\\n'.format(\n                curcls_name, *min_overlaps[i, :, j]))\n            if mAP40_bbox is not None:\n                result += 'bbox AP40:{:.4f}, {:.4f}, {:.4f}\\n'.format(\n                    *mAP40_bbox[j, :, i])\n            if mAP40_bev is not None:\n                result += 'bev  AP40:{:.4f}, {:.4f}, {:.4f}\\n'.format(\n                    *mAP40_bev[j, :, i])\n            if mAP40_3d is not None:\n                result += '3d   AP40:{:.4f}, {:.4f}, {:.4f}\\n'.format(\n                    *mAP40_3d[j, :, i])\n            if compute_aos:\n                result += 'aos  AP40:{:.2f}, {:.2f}, {:.2f}\\n'.format(\n                    *mAP40_aos[j, :, i])\n\n            # prepare results for logger\n            for idx in range(3):\n                if i == 0:\n                    postfix = f'{difficulty[idx]}_strict'\n                else:\n                    postfix = f'{difficulty[idx]}_loose'\n                prefix = f'KITTI/{curcls_name}'\n                if mAP40_3d is not None:\n                    ret_dict[f'{prefix}_3D_AP40_{postfix}'] =\\\n                        mAP40_3d[j, idx, i]\n                if mAP40_bev is not None:\n                    ret_dict[f'{prefix}_BEV_AP40_{postfix}'] =\\\n                        mAP40_bev[j, idx, i]\n                if mAP40_bbox is not None:\n                    ret_dict[f'{prefix}_2D_AP40_{postfix}'] =\\\n                        mAP40_bbox[j, idx, i]\n\n    # calculate mAP40 over all classes if there are multiple classes\n    if len(current_classes) > 1:\n        # prepare results for print\n        result += ('\\nOverall AP40@{}, {}, {}:\\n'.format(*difficulty))\n        if mAP40_bbox is not None:\n            mAP40_bbox = mAP40_bbox.mean(axis=0)\n            result += 'bbox AP40:{:.4f}, {:.4f}, {:.4f}\\n'.format(\n                *mAP40_bbox[:, 0])\n        if mAP40_bev is not None:\n            mAP40_bev = mAP40_bev.mean(axis=0)\n            result += 'bev  AP40:{:.4f}, {:.4f}, {:.4f}\\n'.format(\n                *mAP40_bev[:, 0])\n        if mAP40_3d is not None:\n            mAP40_3d = mAP40_3d.mean(axis=0)\n            result += '3d   AP40:{:.4f}, {:.4f}, {:.4f}\\n'.format(*mAP40_3d[:,\n                                                                            0])\n        if compute_aos:\n            mAP40_aos = mAP40_aos.mean(axis=0)\n            result += 'aos  AP40:{:.2f}, {:.2f}, {:.2f}\\n'.format(\n                *mAP40_aos[:, 0])\n\n        # prepare results for logger\n        for idx in range(3):\n            postfix = f'{difficulty[idx]}'\n            if mAP40_3d is not None:\n                ret_dict[f'KITTI/Overall_3D_AP40_{postfix}'] = mAP40_3d[idx, 0]\n            if mAP40_bev is not None:\n                ret_dict[f'KITTI/Overall_BEV_AP40_{postfix}'] =\\\n                    mAP40_bev[idx, 0]\n            if mAP40_bbox is not None:\n                ret_dict[f'KITTI/Overall_2D_AP40_{postfix}'] =\\\n                    mAP40_bbox[idx, 0]\n\n    return result, ret_dict\n\n\ndef kitti_eval_coco_style(gt_annos, dt_annos, current_classes):\n    \"\"\"coco style evaluation of kitti.\n\n    Args:\n        gt_annos (list[dict]): Contain gt information of each sample.\n        dt_annos (list[dict]): Contain detected information of each sample.\n        current_classes (list[str]): Classes to evaluation.\n\n    Returns:\n        string: Evaluation results.\n    \"\"\"\n    class_to_name = {\n        0: 'Car',\n        1: 'Pedestrian',\n        2: 'Cyclist',\n        3: 'Van',\n        4: 'Person_sitting',\n    }\n    class_to_range = {\n        0: [0.5, 0.95, 10],\n        1: [0.25, 0.7, 10],\n        2: [0.25, 0.7, 10],\n        3: [0.5, 0.95, 10],\n        4: [0.25, 0.7, 10],\n    }\n    name_to_class = {v: n for n, v in class_to_name.items()}\n    if not isinstance(current_classes, (list, tuple)):\n        current_classes = [current_classes]\n    current_classes_int = []\n    for curcls in current_classes:\n        if isinstance(curcls, str):\n            current_classes_int.append(name_to_class[curcls])\n        else:\n            current_classes_int.append(curcls)\n    current_classes = current_classes_int\n    overlap_ranges = np.zeros([3, 3, len(current_classes)])\n    for i, curcls in enumerate(current_classes):\n        overlap_ranges[:, :, i] = np.array(class_to_range[curcls])[:,\n                                                                   np.newaxis]\n    result = ''\n    # check whether alpha is valid\n    compute_aos = False\n    for anno in dt_annos:\n        if anno['alpha'].shape[0] != 0:\n            if anno['alpha'][0] != -10:\n                compute_aos = True\n            break\n    mAPbbox, mAPbev, mAP3d, mAPaos = do_coco_style_eval(\n        gt_annos, dt_annos, current_classes, overlap_ranges, compute_aos)\n    for j, curcls in enumerate(current_classes):\n        # mAP threshold array: [num_minoverlap, metric, class]\n        # mAP result: [num_class, num_diff, num_minoverlap]\n        o_range = np.array(class_to_range[curcls])[[0, 2, 1]]\n        o_range[1] = (o_range[2] - o_range[0]) / (o_range[1] - 1)\n        result += print_str((f'{class_to_name[curcls]} '\n                             'coco AP@{:.2f}:{:.2f}:{:.2f}:'.format(*o_range)))\n        result += print_str((f'bbox AP:{mAPbbox[j, 0]:.2f}, '\n                             f'{mAPbbox[j, 1]:.2f}, '\n                             f'{mAPbbox[j, 2]:.2f}'))\n        result += print_str((f'bev  AP:{mAPbev[j, 0]:.2f}, '\n                             f'{mAPbev[j, 1]:.2f}, '\n                             f'{mAPbev[j, 2]:.2f}'))\n        result += print_str((f'3d   AP:{mAP3d[j, 0]:.2f}, '\n                             f'{mAP3d[j, 1]:.2f}, '\n                             f'{mAP3d[j, 2]:.2f}'))\n        if compute_aos:\n            result += print_str((f'aos  AP:{mAPaos[j, 0]:.2f}, '\n                                 f'{mAPaos[j, 1]:.2f}, '\n                                 f'{mAPaos[j, 2]:.2f}'))\n    return result\n"
  },
  {
    "path": "mmdet3d/core/evaluation/kitti_utils/rotate_iou.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n#####################\n# Based on https://github.com/hongzhenwang/RRPN-revise\n# Licensed under The MIT License\n# Author: yanyan, scrin@foxmail.com\n#####################\nimport math\n\nimport numba\nimport numpy as np\nfrom numba import cuda\n\n\n@numba.jit(nopython=True)\ndef div_up(m, n):\n    return m // n + (m % n > 0)\n\n\n@cuda.jit(device=True, inline=True)\ndef trangle_area(a, b, c):\n    return ((a[0] - c[0]) * (b[1] - c[1]) - (a[1] - c[1]) *\n            (b[0] - c[0])) / 2.0\n\n\n@cuda.jit(device=True, inline=True)\ndef area(int_pts, num_of_inter):\n    area_val = 0.0\n    for i in range(num_of_inter - 2):\n        area_val += abs(\n            trangle_area(int_pts[:2], int_pts[2 * i + 2:2 * i + 4],\n                         int_pts[2 * i + 4:2 * i + 6]))\n    return area_val\n\n\n@cuda.jit(device=True, inline=True)\ndef sort_vertex_in_convex_polygon(int_pts, num_of_inter):\n    if num_of_inter > 0:\n        center = cuda.local.array((2, ), dtype=numba.float32)\n        center[:] = 0.0\n        for i in range(num_of_inter):\n            center[0] += int_pts[2 * i]\n            center[1] += int_pts[2 * i + 1]\n        center[0] /= num_of_inter\n        center[1] /= num_of_inter\n        v = cuda.local.array((2, ), dtype=numba.float32)\n        vs = cuda.local.array((16, ), dtype=numba.float32)\n        for i in range(num_of_inter):\n            v[0] = int_pts[2 * i] - center[0]\n            v[1] = int_pts[2 * i + 1] - center[1]\n            d = math.sqrt(v[0] * v[0] + v[1] * v[1])\n            v[0] = v[0] / d\n            v[1] = v[1] / d\n            if v[1] < 0:\n                v[0] = -2 - v[0]\n            vs[i] = v[0]\n        j = 0\n        temp = 0\n        for i in range(1, num_of_inter):\n            if vs[i - 1] > vs[i]:\n                temp = vs[i]\n                tx = int_pts[2 * i]\n                ty = int_pts[2 * i + 1]\n                j = i\n                while j > 0 and vs[j - 1] > temp:\n                    vs[j] = vs[j - 1]\n                    int_pts[j * 2] = int_pts[j * 2 - 2]\n                    int_pts[j * 2 + 1] = int_pts[j * 2 - 1]\n                    j -= 1\n\n                vs[j] = temp\n                int_pts[j * 2] = tx\n                int_pts[j * 2 + 1] = ty\n\n\n@cuda.jit(device=True, inline=True)\ndef line_segment_intersection(pts1, pts2, i, j, temp_pts):\n    A = cuda.local.array((2, ), dtype=numba.float32)\n    B = cuda.local.array((2, ), dtype=numba.float32)\n    C = cuda.local.array((2, ), dtype=numba.float32)\n    D = cuda.local.array((2, ), dtype=numba.float32)\n\n    A[0] = pts1[2 * i]\n    A[1] = pts1[2 * i + 1]\n\n    B[0] = pts1[2 * ((i + 1) % 4)]\n    B[1] = pts1[2 * ((i + 1) % 4) + 1]\n\n    C[0] = pts2[2 * j]\n    C[1] = pts2[2 * j + 1]\n\n    D[0] = pts2[2 * ((j + 1) % 4)]\n    D[1] = pts2[2 * ((j + 1) % 4) + 1]\n    BA0 = B[0] - A[0]\n    BA1 = B[1] - A[1]\n    DA0 = D[0] - A[0]\n    CA0 = C[0] - A[0]\n    DA1 = D[1] - A[1]\n    CA1 = C[1] - A[1]\n    acd = DA1 * CA0 > CA1 * DA0\n    bcd = (D[1] - B[1]) * (C[0] - B[0]) > (C[1] - B[1]) * (D[0] - B[0])\n    if acd != bcd:\n        abc = CA1 * BA0 > BA1 * CA0\n        abd = DA1 * BA0 > BA1 * DA0\n        if abc != abd:\n            DC0 = D[0] - C[0]\n            DC1 = D[1] - C[1]\n            ABBA = A[0] * B[1] - B[0] * A[1]\n            CDDC = C[0] * D[1] - D[0] * C[1]\n            DH = BA1 * DC0 - BA0 * DC1\n            Dx = ABBA * DC0 - BA0 * CDDC\n            Dy = ABBA * DC1 - BA1 * CDDC\n            temp_pts[0] = Dx / DH\n            temp_pts[1] = Dy / DH\n            return True\n    return False\n\n\n@cuda.jit(device=True, inline=True)\ndef line_segment_intersection_v1(pts1, pts2, i, j, temp_pts):\n    a = cuda.local.array((2, ), dtype=numba.float32)\n    b = cuda.local.array((2, ), dtype=numba.float32)\n    c = cuda.local.array((2, ), dtype=numba.float32)\n    d = cuda.local.array((2, ), dtype=numba.float32)\n\n    a[0] = pts1[2 * i]\n    a[1] = pts1[2 * i + 1]\n\n    b[0] = pts1[2 * ((i + 1) % 4)]\n    b[1] = pts1[2 * ((i + 1) % 4) + 1]\n\n    c[0] = pts2[2 * j]\n    c[1] = pts2[2 * j + 1]\n\n    d[0] = pts2[2 * ((j + 1) % 4)]\n    d[1] = pts2[2 * ((j + 1) % 4) + 1]\n\n    area_abc = trangle_area(a, b, c)\n    area_abd = trangle_area(a, b, d)\n\n    if area_abc * area_abd >= 0:\n        return False\n\n    area_cda = trangle_area(c, d, a)\n    area_cdb = area_cda + area_abc - area_abd\n\n    if area_cda * area_cdb >= 0:\n        return False\n    t = area_cda / (area_abd - area_abc)\n\n    dx = t * (b[0] - a[0])\n    dy = t * (b[1] - a[1])\n    temp_pts[0] = a[0] + dx\n    temp_pts[1] = a[1] + dy\n    return True\n\n\n@cuda.jit(device=True, inline=True)\ndef point_in_quadrilateral(pt_x, pt_y, corners):\n    ab0 = corners[2] - corners[0]\n    ab1 = corners[3] - corners[1]\n\n    ad0 = corners[6] - corners[0]\n    ad1 = corners[7] - corners[1]\n\n    ap0 = pt_x - corners[0]\n    ap1 = pt_y - corners[1]\n\n    abab = ab0 * ab0 + ab1 * ab1\n    abap = ab0 * ap0 + ab1 * ap1\n    adad = ad0 * ad0 + ad1 * ad1\n    adap = ad0 * ap0 + ad1 * ap1\n\n    return abab >= abap and abap >= 0 and adad >= adap and adap >= 0\n\n\n@cuda.jit(device=True, inline=True)\ndef quadrilateral_intersection(pts1, pts2, int_pts):\n    num_of_inter = 0\n    for i in range(4):\n        if point_in_quadrilateral(pts1[2 * i], pts1[2 * i + 1], pts2):\n            int_pts[num_of_inter * 2] = pts1[2 * i]\n            int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1]\n            num_of_inter += 1\n        if point_in_quadrilateral(pts2[2 * i], pts2[2 * i + 1], pts1):\n            int_pts[num_of_inter * 2] = pts2[2 * i]\n            int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1]\n            num_of_inter += 1\n    temp_pts = cuda.local.array((2, ), dtype=numba.float32)\n    for i in range(4):\n        for j in range(4):\n            has_pts = line_segment_intersection(pts1, pts2, i, j, temp_pts)\n            if has_pts:\n                int_pts[num_of_inter * 2] = temp_pts[0]\n                int_pts[num_of_inter * 2 + 1] = temp_pts[1]\n                num_of_inter += 1\n\n    return num_of_inter\n\n\n@cuda.jit(device=True, inline=True)\ndef rbbox_to_corners(corners, rbbox):\n    # generate clockwise corners and rotate it clockwise\n    angle = rbbox[4]\n    a_cos = math.cos(angle)\n    a_sin = math.sin(angle)\n    center_x = rbbox[0]\n    center_y = rbbox[1]\n    x_d = rbbox[2]\n    y_d = rbbox[3]\n    corners_x = cuda.local.array((4, ), dtype=numba.float32)\n    corners_y = cuda.local.array((4, ), dtype=numba.float32)\n    corners_x[0] = -x_d / 2\n    corners_x[1] = -x_d / 2\n    corners_x[2] = x_d / 2\n    corners_x[3] = x_d / 2\n    corners_y[0] = -y_d / 2\n    corners_y[1] = y_d / 2\n    corners_y[2] = y_d / 2\n    corners_y[3] = -y_d / 2\n    for i in range(4):\n        corners[2 * i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x\n        corners[2 * i +\n                1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y\n\n\n@cuda.jit(device=True, inline=True)\ndef inter(rbbox1, rbbox2):\n    \"\"\"Compute intersection of two rotated boxes.\n\n    Args:\n        rbox1 (np.ndarray, shape=[5]): Rotated 2d box.\n        rbox2 (np.ndarray, shape=[5]): Rotated 2d box.\n\n    Returns:\n        float: Intersection of two rotated boxes.\n    \"\"\"\n    corners1 = cuda.local.array((8, ), dtype=numba.float32)\n    corners2 = cuda.local.array((8, ), dtype=numba.float32)\n    intersection_corners = cuda.local.array((16, ), dtype=numba.float32)\n\n    rbbox_to_corners(corners1, rbbox1)\n    rbbox_to_corners(corners2, rbbox2)\n\n    num_intersection = quadrilateral_intersection(corners1, corners2,\n                                                  intersection_corners)\n    sort_vertex_in_convex_polygon(intersection_corners, num_intersection)\n    # print(intersection_corners.reshape([-1, 2])[:num_intersection])\n\n    return area(intersection_corners, num_intersection)\n\n\n@cuda.jit(device=True, inline=True)\ndef devRotateIoUEval(rbox1, rbox2, criterion=-1):\n    \"\"\"Compute rotated iou on device.\n\n    Args:\n        rbox1 (np.ndarray, shape=[5]): Rotated 2d box.\n        rbox2 (np.ndarray, shape=[5]): Rotated 2d box.\n        criterion (int, optional): Indicate different type of iou.\n            -1 indicate `area_inter / (area1 + area2 - area_inter)`,\n            0 indicate `area_inter / area1`,\n            1 indicate `area_inter / area2`.\n\n    Returns:\n        float: iou between two input boxes.\n    \"\"\"\n    area1 = rbox1[2] * rbox1[3]\n    area2 = rbox2[2] * rbox2[3]\n    area_inter = inter(rbox1, rbox2)\n    if criterion == -1:\n        return area_inter / (area1 + area2 - area_inter)\n    elif criterion == 0:\n        return area_inter / area1\n    elif criterion == 1:\n        return area_inter / area2\n    else:\n        return area_inter\n\n\n@cuda.jit(\n    '(int64, int64, float32[:], float32[:], float32[:], int32)',\n    fastmath=False)\ndef rotate_iou_kernel_eval(N,\n                           K,\n                           dev_boxes,\n                           dev_query_boxes,\n                           dev_iou,\n                           criterion=-1):\n    \"\"\"Kernel of computing rotated IoU. This function is for bev boxes in\n    camera coordinate system ONLY (the rotation is clockwise).\n\n    Args:\n        N (int): The number of boxes.\n        K (int): The number of query boxes.\n        dev_boxes (np.ndarray): Boxes on device.\n        dev_query_boxes (np.ndarray): Query boxes on device.\n        dev_iou (np.ndarray): Computed iou to return.\n        criterion (int, optional): Indicate different type of iou.\n            -1 indicate `area_inter / (area1 + area2 - area_inter)`,\n            0 indicate `area_inter / area1`,\n            1 indicate `area_inter / area2`.\n    \"\"\"\n    threadsPerBlock = 8 * 8\n    row_start = cuda.blockIdx.x\n    col_start = cuda.blockIdx.y\n    tx = cuda.threadIdx.x\n    row_size = min(N - row_start * threadsPerBlock, threadsPerBlock)\n    col_size = min(K - col_start * threadsPerBlock, threadsPerBlock)\n    block_boxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32)\n    block_qboxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32)\n\n    dev_query_box_idx = threadsPerBlock * col_start + tx\n    dev_box_idx = threadsPerBlock * row_start + tx\n    if (tx < col_size):\n        block_qboxes[tx * 5 + 0] = dev_query_boxes[dev_query_box_idx * 5 + 0]\n        block_qboxes[tx * 5 + 1] = dev_query_boxes[dev_query_box_idx * 5 + 1]\n        block_qboxes[tx * 5 + 2] = dev_query_boxes[dev_query_box_idx * 5 + 2]\n        block_qboxes[tx * 5 + 3] = dev_query_boxes[dev_query_box_idx * 5 + 3]\n        block_qboxes[tx * 5 + 4] = dev_query_boxes[dev_query_box_idx * 5 + 4]\n    if (tx < row_size):\n        block_boxes[tx * 5 + 0] = dev_boxes[dev_box_idx * 5 + 0]\n        block_boxes[tx * 5 + 1] = dev_boxes[dev_box_idx * 5 + 1]\n        block_boxes[tx * 5 + 2] = dev_boxes[dev_box_idx * 5 + 2]\n        block_boxes[tx * 5 + 3] = dev_boxes[dev_box_idx * 5 + 3]\n        block_boxes[tx * 5 + 4] = dev_boxes[dev_box_idx * 5 + 4]\n    cuda.syncthreads()\n    if tx < row_size:\n        for i in range(col_size):\n            offset = (\n                row_start * threadsPerBlock * K + col_start * threadsPerBlock +\n                tx * K + i)\n            dev_iou[offset] = devRotateIoUEval(block_qboxes[i * 5:i * 5 + 5],\n                                               block_boxes[tx * 5:tx * 5 + 5],\n                                               criterion)\n\n\ndef rotate_iou_gpu_eval(boxes, query_boxes, criterion=-1, device_id=0):\n    \"\"\"Rotated box iou running in gpu. 500x faster than cpu version (take 5ms\n    in one example with numba.cuda code). convert from [this project](\n    https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation).\n\n    This function is for bev boxes in camera coordinate system ONLY\n    (the rotation is clockwise).\n\n    Args:\n        boxes (torch.Tensor): rbboxes. format: centers, dims,\n            angles(clockwise when positive) with the shape of [N, 5].\n        query_boxes (torch.FloatTensor, shape=(K, 5)):\n            rbboxes to compute iou with boxes.\n        device_id (int, optional): Defaults to 0. Device to use.\n        criterion (int, optional): Indicate different type of iou.\n            -1 indicate `area_inter / (area1 + area2 - area_inter)`,\n            0 indicate `area_inter / area1`,\n            1 indicate `area_inter / area2`.\n\n    Returns:\n        np.ndarray: IoU results.\n    \"\"\"\n    boxes = boxes.astype(np.float32)\n    query_boxes = query_boxes.astype(np.float32)\n    N = boxes.shape[0]\n    K = query_boxes.shape[0]\n    iou = np.zeros((N, K), dtype=np.float32)\n    if N == 0 or K == 0:\n        return iou\n    threadsPerBlock = 8 * 8\n    cuda.select_device(device_id)\n    blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock))\n\n    stream = cuda.stream()\n    with stream.auto_synchronize():\n        boxes_dev = cuda.to_device(boxes.reshape([-1]), stream)\n        query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream)\n        iou_dev = cuda.to_device(iou.reshape([-1]), stream)\n        rotate_iou_kernel_eval[blockspergrid, threadsPerBlock,\n                               stream](N, K, boxes_dev, query_boxes_dev,\n                                       iou_dev, criterion)\n        iou_dev.copy_to_host(iou.reshape([-1]), stream=stream)\n    return iou.astype(boxes.dtype)\n"
  },
  {
    "path": "mmdet3d/core/evaluation/lyft_eval.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom os import path as osp\n\nimport mmcv\nimport numpy as np\nfrom lyft_dataset_sdk.eval.detection.mAP_evaluation import (Box3D, get_ap,\n                                                            get_class_names,\n                                                            get_ious,\n                                                            group_by_key,\n                                                            wrap_in_box)\nfrom mmcv.utils import print_log\nfrom terminaltables import AsciiTable\n\n\ndef load_lyft_gts(lyft, data_root, eval_split, logger=None):\n    \"\"\"Loads ground truth boxes from database.\n\n    Args:\n        lyft (:obj:`LyftDataset`): Lyft class in the sdk.\n        data_root (str): Root of data for reading splits.\n        eval_split (str): Name of the split for evaluation.\n        logger (logging.Logger | str, optional): Logger used for printing\n        related information during evaluation. Default: None.\n\n    Returns:\n        list[dict]: List of annotation dictionaries.\n    \"\"\"\n    split_scenes = mmcv.list_from_file(\n        osp.join(data_root, f'{eval_split}.txt'))\n\n    # Read out all sample_tokens in DB.\n    sample_tokens_all = [s['token'] for s in lyft.sample]\n    assert len(sample_tokens_all) > 0, 'Error: Database has no samples!'\n\n    if eval_split == 'test':\n        # Check that you aren't trying to cheat :)\n        assert len(lyft.sample_annotation) > 0, \\\n            'Error: You are trying to evaluate on the test set \\\n             but you do not have the annotations!'\n\n    sample_tokens = []\n    for sample_token in sample_tokens_all:\n        scene_token = lyft.get('sample', sample_token)['scene_token']\n        scene_record = lyft.get('scene', scene_token)\n        if scene_record['name'] in split_scenes:\n            sample_tokens.append(sample_token)\n\n    all_annotations = []\n\n    print_log('Loading ground truth annotations...', logger=logger)\n    # Load annotations and filter predictions and annotations.\n    for sample_token in mmcv.track_iter_progress(sample_tokens):\n        sample = lyft.get('sample', sample_token)\n        sample_annotation_tokens = sample['anns']\n        for sample_annotation_token in sample_annotation_tokens:\n            # Get label name in detection task and filter unused labels.\n            sample_annotation = \\\n                lyft.get('sample_annotation', sample_annotation_token)\n            detection_name = sample_annotation['category_name']\n            if detection_name is None:\n                continue\n            annotation = {\n                'sample_token': sample_token,\n                'translation': sample_annotation['translation'],\n                'size': sample_annotation['size'],\n                'rotation': sample_annotation['rotation'],\n                'name': detection_name,\n            }\n            all_annotations.append(annotation)\n\n    return all_annotations\n\n\ndef load_lyft_predictions(res_path):\n    \"\"\"Load Lyft predictions from json file.\n\n    Args:\n        res_path (str): Path of result json file recording detections.\n\n    Returns:\n        list[dict]: List of prediction dictionaries.\n    \"\"\"\n    predictions = mmcv.load(res_path)\n    predictions = predictions['results']\n    all_preds = []\n    for sample_token in predictions.keys():\n        all_preds.extend(predictions[sample_token])\n    return all_preds\n\n\ndef lyft_eval(lyft, data_root, res_path, eval_set, output_dir, logger=None):\n    \"\"\"Evaluation API for Lyft dataset.\n\n    Args:\n        lyft (:obj:`LyftDataset`): Lyft class in the sdk.\n        data_root (str): Root of data for reading splits.\n        res_path (str): Path of result json file recording detections.\n        eval_set (str): Name of the split for evaluation.\n        output_dir (str): Output directory for output json files.\n        logger (logging.Logger | str, optional): Logger used for printing\n                related information during evaluation. Default: None.\n\n    Returns:\n        dict[str, float]: The evaluation results.\n    \"\"\"\n    # evaluate by lyft metrics\n    gts = load_lyft_gts(lyft, data_root, eval_set, logger)\n    predictions = load_lyft_predictions(res_path)\n\n    class_names = get_class_names(gts)\n    print('Calculating mAP@0.5:0.95...')\n\n    iou_thresholds = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]\n    metrics = {}\n    average_precisions = \\\n        get_classwise_aps(gts, predictions, class_names, iou_thresholds)\n    APs_data = [['IOU', 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]]\n\n    mAPs = np.mean(average_precisions, axis=0)\n    mAPs_cate = np.mean(average_precisions, axis=1)\n    final_mAP = np.mean(mAPs)\n\n    metrics['average_precisions'] = average_precisions.tolist()\n    metrics['mAPs'] = mAPs.tolist()\n    metrics['Final mAP'] = float(final_mAP)\n    metrics['class_names'] = class_names\n    metrics['mAPs_cate'] = mAPs_cate.tolist()\n\n    APs_data = [['class', 'mAP@0.5:0.95']]\n    for i in range(len(class_names)):\n        row = [class_names[i], round(mAPs_cate[i], 3)]\n        APs_data.append(row)\n    APs_data.append(['Overall', round(final_mAP, 3)])\n    APs_table = AsciiTable(APs_data, title='mAPs@0.5:0.95')\n    APs_table.inner_footing_row_border = True\n    print_log(APs_table.table, logger=logger)\n\n    res_path = osp.join(output_dir, 'lyft_metrics.json')\n    mmcv.dump(metrics, res_path)\n    return metrics\n\n\ndef get_classwise_aps(gt, predictions, class_names, iou_thresholds):\n    \"\"\"Returns an array with an average precision per class.\n\n    Note: Ground truth and predictions should have the following format.\n\n    .. code-block::\n\n    gt = [{\n        'sample_token': '0f0e3ce89d2324d8b45aa55a7b4f8207\n                         fbb039a550991a5149214f98cec136ac',\n        'translation': [974.2811881299899, 1714.6815014457964,\n                        -23.689857123368846],\n        'size': [1.796, 4.488, 1.664],\n        'rotation': [0.14882026466054782, 0, 0, 0.9888642620837121],\n        'name': 'car'\n    }]\n\n    predictions = [{\n        'sample_token': '0f0e3ce89d2324d8b45aa55a7b4f8207\n                         fbb039a550991a5149214f98cec136ac',\n        'translation': [971.8343488872263, 1713.6816097857359,\n                        -25.82534357061308],\n        'size': [2.519726579986132, 7.810161372666739, 3.483438286096803],\n        'rotation': [0.10913582721095375, 0.04099572636992043,\n                     0.01927712319721745, 1.029328402625659],\n        'name': 'car',\n        'score': 0.3077029437237213\n    }]\n\n    Args:\n        gt (list[dict]): list of dictionaries in the format described below.\n        predictions (list[dict]): list of dictionaries in the format\n            described below.\n        class_names (list[str]): list of the class names.\n        iou_thresholds (list[float]): IOU thresholds used to calculate\n            TP / FN\n\n    Returns:\n        np.ndarray: an array with an average precision per class.\n    \"\"\"\n    assert all([0 <= iou_th <= 1 for iou_th in iou_thresholds])\n\n    gt_by_class_name = group_by_key(gt, 'name')\n    pred_by_class_name = group_by_key(predictions, 'name')\n\n    average_precisions = np.zeros((len(class_names), len(iou_thresholds)))\n\n    for class_id, class_name in enumerate(class_names):\n        if class_name in pred_by_class_name:\n            recalls, precisions, average_precision = get_single_class_aps(\n                gt_by_class_name[class_name], pred_by_class_name[class_name],\n                iou_thresholds)\n            average_precisions[class_id, :] = average_precision\n\n    return average_precisions\n\n\ndef get_single_class_aps(gt, predictions, iou_thresholds):\n    \"\"\"Compute recall and precision for all iou thresholds. Adapted from\n    LyftDatasetDevkit.\n\n    Args:\n        gt (list[dict]): list of dictionaries in the format described above.\n        predictions (list[dict]): list of dictionaries in the format\n            described below.\n        iou_thresholds (list[float]): IOU thresholds used to calculate\n            TP / FN\n\n    Returns:\n        tuple[np.ndarray]: Returns (recalls, precisions, average precisions)\n            for each class.\n    \"\"\"\n    num_gts = len(gt)\n    image_gts = group_by_key(gt, 'sample_token')\n    image_gts = wrap_in_box(image_gts)\n\n    sample_gt_checked = {\n        sample_token: np.zeros((len(boxes), len(iou_thresholds)))\n        for sample_token, boxes in image_gts.items()\n    }\n\n    predictions = sorted(predictions, key=lambda x: x['score'], reverse=True)\n\n    # go down dets and mark TPs and FPs\n    num_predictions = len(predictions)\n    tps = np.zeros((num_predictions, len(iou_thresholds)))\n    fps = np.zeros((num_predictions, len(iou_thresholds)))\n\n    for prediction_index, prediction in enumerate(predictions):\n        predicted_box = Box3D(**prediction)\n\n        sample_token = prediction['sample_token']\n\n        max_overlap = -np.inf\n        jmax = -1\n\n        if sample_token in image_gts:\n            gt_boxes = image_gts[sample_token]\n            # gt_boxes per sample\n            gt_checked = sample_gt_checked[sample_token]\n            # gt flags per sample\n        else:\n            gt_boxes = []\n            gt_checked = None\n\n        if len(gt_boxes) > 0:\n            overlaps = get_ious(gt_boxes, predicted_box)\n\n            max_overlap = np.max(overlaps)\n\n            jmax = np.argmax(overlaps)\n\n        for i, iou_threshold in enumerate(iou_thresholds):\n            if max_overlap > iou_threshold:\n                if gt_checked[jmax, i] == 0:\n                    tps[prediction_index, i] = 1.0\n                    gt_checked[jmax, i] = 1\n                else:\n                    fps[prediction_index, i] = 1.0\n            else:\n                fps[prediction_index, i] = 1.0\n\n    # compute precision recall\n    fps = np.cumsum(fps, axis=0)\n    tps = np.cumsum(tps, axis=0)\n\n    recalls = tps / float(num_gts)\n    # avoid divide by zero in case the first detection\n    # matches a difficult ground truth\n    precisions = tps / np.maximum(tps + fps, np.finfo(np.float64).eps)\n\n    aps = []\n    for i in range(len(iou_thresholds)):\n        recall = recalls[:, i]\n        precision = precisions[:, i]\n        assert np.all(0 <= recall) & np.all(recall <= 1)\n        assert np.all(0 <= precision) & np.all(precision <= 1)\n        ap = get_ap(recall, precision)\n        aps.append(ap)\n\n    aps = np.array(aps)\n\n    return recalls, precisions, aps\n"
  },
  {
    "path": "mmdet3d/core/evaluation/scannet_utils/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .evaluate_semantic_instance import evaluate_matches, scannet_eval\n\n__all__ = ['scannet_eval', 'evaluate_matches']\n"
  },
  {
    "path": "mmdet3d/core/evaluation/scannet_utils/evaluate_semantic_instance.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n# adapted from https://github.com/ScanNet/ScanNet/blob/master/BenchmarkScripts/3d_evaluation/evaluate_semantic_instance.py # noqa\nfrom copy import deepcopy\n\nimport numpy as np\n\nfrom . import util_3d\n\n\ndef evaluate_matches(matches, class_labels, options):\n    \"\"\"Evaluate instance segmentation from matched gt and predicted instances\n    for all scenes.\n\n    Args:\n        matches (dict): Contains gt2pred and pred2gt infos for every scene.\n        class_labels (tuple[str]): Class names.\n        options (dict): ScanNet evaluator options. See get_options.\n\n    Returns:\n        np.array: Average precision scores for all thresholds and categories.\n    \"\"\"\n    overlaps = options['overlaps']\n    min_region_sizes = [options['min_region_sizes'][0]]\n    dist_threshes = [options['distance_threshes'][0]]\n    dist_confs = [options['distance_confs'][0]]\n\n    # results: class x overlap\n    ap = np.zeros((len(dist_threshes), len(class_labels), len(overlaps)),\n                  np.float)\n    for di, (min_region_size, distance_thresh, distance_conf) in enumerate(\n            zip(min_region_sizes, dist_threshes, dist_confs)):\n        for oi, overlap_th in enumerate(overlaps):\n            pred_visited = {}\n            for m in matches:\n                for label_name in class_labels:\n                    for p in matches[m]['pred'][label_name]:\n                        if 'filename' in p:\n                            pred_visited[p['filename']] = False\n            for li, label_name in enumerate(class_labels):\n                y_true = np.empty(0)\n                y_score = np.empty(0)\n                hard_false_negatives = 0\n                has_gt = False\n                has_pred = False\n                for m in matches:\n                    pred_instances = matches[m]['pred'][label_name]\n                    gt_instances = matches[m]['gt'][label_name]\n                    # filter groups in ground truth\n                    gt_instances = [\n                        gt for gt in gt_instances\n                        if gt['instance_id'] >= 1000 and gt['vert_count'] >=\n                        min_region_size and gt['med_dist'] <= distance_thresh\n                        and gt['dist_conf'] >= distance_conf\n                    ]\n                    if gt_instances:\n                        has_gt = True\n                    if pred_instances:\n                        has_pred = True\n\n                    cur_true = np.ones(len(gt_instances))\n                    cur_score = np.ones(len(gt_instances)) * (-float('inf'))\n                    cur_match = np.zeros(len(gt_instances), dtype=np.bool)\n                    # collect matches\n                    for (gti, gt) in enumerate(gt_instances):\n                        found_match = False\n                        for pred in gt['matched_pred']:\n                            # greedy assignments\n                            if pred_visited[pred['filename']]:\n                                continue\n                            overlap = float(pred['intersection']) / (\n                                gt['vert_count'] + pred['vert_count'] -\n                                pred['intersection'])\n                            if overlap > overlap_th:\n                                confidence = pred['confidence']\n                                # if already have a prediction for this gt,\n                                # the prediction with the lower score is automatically a false positive # noqa\n                                if cur_match[gti]:\n                                    max_score = max(cur_score[gti], confidence)\n                                    min_score = min(cur_score[gti], confidence)\n                                    cur_score[gti] = max_score\n                                    # append false positive\n                                    cur_true = np.append(cur_true, 0)\n                                    cur_score = np.append(cur_score, min_score)\n                                    cur_match = np.append(cur_match, True)\n                                # otherwise set score\n                                else:\n                                    found_match = True\n                                    cur_match[gti] = True\n                                    cur_score[gti] = confidence\n                                    pred_visited[pred['filename']] = True\n                        if not found_match:\n                            hard_false_negatives += 1\n                    # remove non-matched ground truth instances\n                    cur_true = cur_true[cur_match]\n                    cur_score = cur_score[cur_match]\n\n                    # collect non-matched predictions as false positive\n                    for pred in pred_instances:\n                        found_gt = False\n                        for gt in pred['matched_gt']:\n                            overlap = float(gt['intersection']) / (\n                                gt['vert_count'] + pred['vert_count'] -\n                                gt['intersection'])\n                            if overlap > overlap_th:\n                                found_gt = True\n                                break\n                        if not found_gt:\n                            num_ignore = pred['void_intersection']\n                            for gt in pred['matched_gt']:\n                                # group?\n                                if gt['instance_id'] < 1000:\n                                    num_ignore += gt['intersection']\n                                # small ground truth instances\n                                if gt['vert_count'] < min_region_size or gt[\n                                        'med_dist'] > distance_thresh or gt[\n                                            'dist_conf'] < distance_conf:\n                                    num_ignore += gt['intersection']\n                            proportion_ignore = float(\n                                num_ignore) / pred['vert_count']\n                            # if not ignored append false positive\n                            if proportion_ignore <= overlap_th:\n                                cur_true = np.append(cur_true, 0)\n                                confidence = pred['confidence']\n                                cur_score = np.append(cur_score, confidence)\n\n                    # append to overall results\n                    y_true = np.append(y_true, cur_true)\n                    y_score = np.append(y_score, cur_score)\n\n                # compute average precision\n                if has_gt and has_pred:\n                    # compute precision recall curve first\n\n                    # sorting and cumsum\n                    score_arg_sort = np.argsort(y_score)\n                    y_score_sorted = y_score[score_arg_sort]\n                    y_true_sorted = y_true[score_arg_sort]\n                    y_true_sorted_cumsum = np.cumsum(y_true_sorted)\n\n                    # unique thresholds\n                    (thresholds, unique_indices) = np.unique(\n                        y_score_sorted, return_index=True)\n                    num_prec_recall = len(unique_indices) + 1\n\n                    # prepare precision recall\n                    num_examples = len(y_score_sorted)\n                    # follow https://github.com/ScanNet/ScanNet/pull/26 ? # noqa\n                    num_true_examples = y_true_sorted_cumsum[-1] if len(\n                        y_true_sorted_cumsum) > 0 else 0\n                    precision = np.zeros(num_prec_recall)\n                    recall = np.zeros(num_prec_recall)\n\n                    # deal with the first point\n                    y_true_sorted_cumsum = np.append(y_true_sorted_cumsum, 0)\n                    # deal with remaining\n                    for idx_res, idx_scores in enumerate(unique_indices):\n                        cumsum = y_true_sorted_cumsum[idx_scores - 1]\n                        tp = num_true_examples - cumsum\n                        fp = num_examples - idx_scores - tp\n                        fn = cumsum + hard_false_negatives\n                        p = float(tp) / (tp + fp)\n                        r = float(tp) / (tp + fn)\n                        precision[idx_res] = p\n                        recall[idx_res] = r\n\n                    # first point in curve is artificial\n                    precision[-1] = 1.\n                    recall[-1] = 0.\n\n                    # compute average of precision-recall curve\n                    recall_for_conv = np.copy(recall)\n                    recall_for_conv = np.append(recall_for_conv[0],\n                                                recall_for_conv)\n                    recall_for_conv = np.append(recall_for_conv, 0.)\n\n                    stepWidths = np.convolve(recall_for_conv, [-0.5, 0, 0.5],\n                                             'valid')\n                    # integrate is now simply a dot product\n                    ap_current = np.dot(precision, stepWidths)\n\n                elif has_gt:\n                    ap_current = 0.0\n                else:\n                    ap_current = float('nan')\n                ap[di, li, oi] = ap_current\n    return ap\n\n\ndef compute_averages(aps, options, class_labels):\n    \"\"\"Averages AP scores for all categories.\n\n    Args:\n        aps (np.array): AP scores for all thresholds and categories.\n        options (dict): ScanNet evaluator options. See get_options.\n        class_labels (tuple[str]): Class names.\n\n    Returns:\n        dict: Overall and per-category AP scores.\n    \"\"\"\n    d_inf = 0\n    o50 = np.where(np.isclose(options['overlaps'], 0.5))\n    o25 = np.where(np.isclose(options['overlaps'], 0.25))\n    o_all_but25 = np.where(\n        np.logical_not(np.isclose(options['overlaps'], 0.25)))\n    avg_dict = {}\n    avg_dict['all_ap'] = np.nanmean(aps[d_inf, :, o_all_but25])\n    avg_dict['all_ap_50%'] = np.nanmean(aps[d_inf, :, o50])\n    avg_dict['all_ap_25%'] = np.nanmean(aps[d_inf, :, o25])\n    avg_dict['classes'] = {}\n    for (li, label_name) in enumerate(class_labels):\n        avg_dict['classes'][label_name] = {}\n        avg_dict['classes'][label_name]['ap'] = np.average(aps[d_inf, li,\n                                                               o_all_but25])\n        avg_dict['classes'][label_name]['ap50%'] = np.average(aps[d_inf, li,\n                                                                  o50])\n        avg_dict['classes'][label_name]['ap25%'] = np.average(aps[d_inf, li,\n                                                                  o25])\n    return avg_dict\n\n\ndef assign_instances_for_scan(pred_info, gt_ids, options, valid_class_ids,\n                              class_labels, id_to_label):\n    \"\"\"Assign gt and predicted instances for a single scene.\n\n    Args:\n        pred_info (dict): Predicted masks, labels and scores.\n        gt_ids (np.array): Ground truth instance masks.\n        options (dict): ScanNet evaluator options. See get_options.\n        valid_class_ids (tuple[int]): Ids of valid categories.\n        class_labels (tuple[str]): Class names.\n        id_to_label (dict[int, str]): Mapping of valid class id to class label.\n\n    Returns:\n        dict: Per class assigned gt to predicted instances.\n        dict: Per class assigned predicted to gt instances.\n    \"\"\"\n    # get gt instances\n    gt_instances = util_3d.get_instances(gt_ids, valid_class_ids, class_labels,\n                                         id_to_label)\n    # associate\n    gt2pred = deepcopy(gt_instances)\n    for label in gt2pred:\n        for gt in gt2pred[label]:\n            gt['matched_pred'] = []\n    pred2gt = {}\n    for label in class_labels:\n        pred2gt[label] = []\n    num_pred_instances = 0\n    # mask of void labels in the ground truth\n    bool_void = np.logical_not(np.in1d(gt_ids // 1000, valid_class_ids))\n    # go through all prediction masks\n    for pred_mask_file in pred_info:\n        label_id = int(pred_info[pred_mask_file]['label_id'])\n        conf = pred_info[pred_mask_file]['conf']\n        if not label_id in id_to_label:  # noqa E713\n            continue\n        label_name = id_to_label[label_id]\n        # read the mask\n        pred_mask = pred_info[pred_mask_file]['mask']\n        if len(pred_mask) != len(gt_ids):\n            raise ValueError('len(pred_mask) != len(gt_ids)')\n        # convert to binary\n        pred_mask = np.not_equal(pred_mask, 0)\n        num = np.count_nonzero(pred_mask)\n        if num < options['min_region_sizes'][0]:\n            continue  # skip if empty\n\n        pred_instance = {}\n        pred_instance['filename'] = pred_mask_file\n        pred_instance['pred_id'] = num_pred_instances\n        pred_instance['label_id'] = label_id\n        pred_instance['vert_count'] = num\n        pred_instance['confidence'] = conf\n        pred_instance['void_intersection'] = np.count_nonzero(\n            np.logical_and(bool_void, pred_mask))\n\n        # matched gt instances\n        matched_gt = []\n        # go through all gt instances with matching label\n        for (gt_num, gt_inst) in enumerate(gt2pred[label_name]):\n            intersection = np.count_nonzero(\n                np.logical_and(gt_ids == gt_inst['instance_id'], pred_mask))\n            if intersection > 0:\n                gt_copy = gt_inst.copy()\n                pred_copy = pred_instance.copy()\n                gt_copy['intersection'] = intersection\n                pred_copy['intersection'] = intersection\n                matched_gt.append(gt_copy)\n                gt2pred[label_name][gt_num]['matched_pred'].append(pred_copy)\n        pred_instance['matched_gt'] = matched_gt\n        num_pred_instances += 1\n        pred2gt[label_name].append(pred_instance)\n\n    return gt2pred, pred2gt\n\n\ndef scannet_eval(preds, gts, options, valid_class_ids, class_labels,\n                 id_to_label):\n    \"\"\"Evaluate instance segmentation in ScanNet protocol.\n\n    Args:\n        preds (list[dict]): Per scene predictions of mask, label and\n            confidence.\n        gts (list[np.array]): Per scene ground truth instance masks.\n        options (dict): ScanNet evaluator options. See get_options.\n        valid_class_ids (tuple[int]): Ids of valid categories.\n        class_labels (tuple[str]): Class names.\n        id_to_label (dict[int, str]): Mapping of valid class id to class label.\n\n    Returns:\n        dict: Overall and per-category AP scores.\n    \"\"\"\n    options = get_options(options)\n    matches = {}\n    for i, (pred, gt) in enumerate(zip(preds, gts)):\n        matches_key = i\n        # assign gt to predictions\n        gt2pred, pred2gt = assign_instances_for_scan(pred, gt, options,\n                                                     valid_class_ids,\n                                                     class_labels, id_to_label)\n        matches[matches_key] = {}\n        matches[matches_key]['gt'] = gt2pred\n        matches[matches_key]['pred'] = pred2gt\n\n    ap_scores = evaluate_matches(matches, class_labels, options)\n    avgs = compute_averages(ap_scores, options, class_labels)\n    return avgs\n\n\ndef get_options(options=None):\n    \"\"\"Set ScanNet evaluator options.\n\n    Args:\n        options (dict, optional): Not default options. Default: None.\n\n    Returns:\n        dict: Updated options with all 4 keys.\n    \"\"\"\n    assert options is None or isinstance(options, dict)\n    _options = dict(\n        overlaps=np.append(np.arange(0.5, 0.95, 0.05), 0.25),\n        min_region_sizes=np.array([100]),\n        distance_threshes=np.array([float('inf')]),\n        distance_confs=np.array([-float('inf')]))\n    if options is not None:\n        _options.update(options)\n    return _options\n"
  },
  {
    "path": "mmdet3d/core/evaluation/scannet_utils/util_3d.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n# adapted from https://github.com/ScanNet/ScanNet/blob/master/BenchmarkScripts/util_3d.py # noqa\nimport json\n\nimport numpy as np\n\n\nclass Instance:\n    \"\"\"Single instance for ScanNet evaluator.\n\n    Args:\n        mesh_vert_instances (np.array): Instance ids for each point.\n        instance_id: Id of single instance.\n    \"\"\"\n    instance_id = 0\n    label_id = 0\n    vert_count = 0\n    med_dist = -1\n    dist_conf = 0.0\n\n    def __init__(self, mesh_vert_instances, instance_id):\n        if instance_id == -1:\n            return\n        self.instance_id = int(instance_id)\n        self.label_id = int(self.get_label_id(instance_id))\n        self.vert_count = int(\n            self.get_instance_verts(mesh_vert_instances, instance_id))\n\n    @staticmethod\n    def get_label_id(instance_id):\n        return int(instance_id // 1000)\n\n    @staticmethod\n    def get_instance_verts(mesh_vert_instances, instance_id):\n        return (mesh_vert_instances == instance_id).sum()\n\n    def to_json(self):\n        return json.dumps(\n            self, default=lambda o: o.__dict__, sort_keys=True, indent=4)\n\n    def to_dict(self):\n        dict = {}\n        dict['instance_id'] = self.instance_id\n        dict['label_id'] = self.label_id\n        dict['vert_count'] = self.vert_count\n        dict['med_dist'] = self.med_dist\n        dict['dist_conf'] = self.dist_conf\n        return dict\n\n    def from_json(self, data):\n        self.instance_id = int(data['instance_id'])\n        self.label_id = int(data['label_id'])\n        self.vert_count = int(data['vert_count'])\n        if 'med_dist' in data:\n            self.med_dist = float(data['med_dist'])\n            self.dist_conf = float(data['dist_conf'])\n\n    def __str__(self):\n        return '(' + str(self.instance_id) + ')'\n\n\ndef get_instances(ids, class_ids, class_labels, id2label):\n    \"\"\"Transform gt instance mask to Instance objects.\n\n    Args:\n        ids (np.array): Instance ids for each point.\n        class_ids: (tuple[int]): Ids of valid categories.\n        class_labels (tuple[str]): Class names.\n        id2label: (dict[int, str]): Mapping of valid class id to class label.\n\n    Returns:\n        dict [str, list]: Instance objects grouped by class label.\n    \"\"\"\n    instances = {}\n    for label in class_labels:\n        instances[label] = []\n    instance_ids = np.unique(ids)\n    for id in instance_ids:\n        if id == 0:\n            continue\n        inst = Instance(ids, id)\n        if inst.label_id in class_ids:\n            instances[id2label[inst.label_id]].append(inst.to_dict())\n    return instances\n"
  },
  {
    "path": "mmdet3d/core/evaluation/seg_eval.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nfrom mmcv.utils import print_log\nfrom terminaltables import AsciiTable\n\n\ndef fast_hist(preds, labels, num_classes):\n    \"\"\"Compute the confusion matrix for every batch.\n\n    Args:\n        preds (np.ndarray):  Prediction labels of points with shape of\n        (num_points, ).\n        labels (np.ndarray): Ground truth labels of points with shape of\n        (num_points, ).\n        num_classes (int): number of classes\n\n    Returns:\n        np.ndarray: Calculated confusion matrix.\n    \"\"\"\n\n    k = (labels >= 0) & (labels < num_classes)\n    bin_count = np.bincount(\n        num_classes * labels[k].astype(int) + preds[k],\n        minlength=num_classes**2)\n    return bin_count[:num_classes**2].reshape(num_classes, num_classes)\n\n\ndef per_class_iou(hist):\n    \"\"\"Compute the per class iou.\n\n    Args:\n        hist(np.ndarray):  Overall confusion martix\n        (num_classes, num_classes ).\n\n    Returns:\n        np.ndarray: Calculated per class iou\n    \"\"\"\n\n    return np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))\n\n\ndef get_acc(hist):\n    \"\"\"Compute the overall accuracy.\n\n    Args:\n        hist(np.ndarray):  Overall confusion martix\n        (num_classes, num_classes ).\n\n    Returns:\n        float: Calculated overall acc\n    \"\"\"\n\n    return np.diag(hist).sum() / hist.sum()\n\n\ndef get_acc_cls(hist):\n    \"\"\"Compute the class average accuracy.\n\n    Args:\n        hist(np.ndarray):  Overall confusion martix\n        (num_classes, num_classes ).\n\n    Returns:\n        float: Calculated class average acc\n    \"\"\"\n\n    return np.nanmean(np.diag(hist) / hist.sum(axis=1))\n\n\ndef seg_eval(gt_labels, seg_preds, label2cat, ignore_index, logger=None):\n    \"\"\"Semantic Segmentation  Evaluation.\n\n    Evaluate the result of the Semantic Segmentation.\n\n    Args:\n        gt_labels (list[torch.Tensor]): Ground truth labels.\n        seg_preds  (list[torch.Tensor]): Predictions.\n        label2cat (dict): Map from label to category name.\n        ignore_index (int): Index that will be ignored in evaluation.\n        logger (logging.Logger | str, optional): The way to print the mAP\n            summary. See `mmdet.utils.print_log()` for details. Default: None.\n\n    Returns:\n        dict[str, float]: Dict of results.\n    \"\"\"\n    assert len(seg_preds) == len(gt_labels)\n    num_classes = len(label2cat)\n\n    hist_list = []\n    for i in range(len(gt_labels)):\n        gt_seg = gt_labels[i].clone().numpy().astype(np.int)\n        pred_seg = seg_preds[i].clone().numpy().astype(np.int)\n\n        # filter out ignored points\n        pred_seg[gt_seg == ignore_index] = -1\n        gt_seg[gt_seg == ignore_index] = -1\n\n        # calculate one instance result\n        hist_list.append(fast_hist(pred_seg, gt_seg, num_classes))\n\n    iou = per_class_iou(sum(hist_list))\n    miou = np.nanmean(iou)\n    acc = get_acc(sum(hist_list))\n    acc_cls = get_acc_cls(sum(hist_list))\n\n    header = ['classes']\n    for i in range(len(label2cat)):\n        header.append(label2cat[i])\n    header.extend(['miou', 'acc', 'acc_cls'])\n\n    ret_dict = dict()\n    table_columns = [['results']]\n    for i in range(len(label2cat)):\n        ret_dict[label2cat[i]] = float(iou[i])\n        table_columns.append([f'{iou[i]:.4f}'])\n    ret_dict['miou'] = float(miou)\n    ret_dict['acc'] = float(acc)\n    ret_dict['acc_cls'] = float(acc_cls)\n\n    table_columns.append([f'{miou:.4f}'])\n    table_columns.append([f'{acc:.4f}'])\n    table_columns.append([f'{acc_cls:.4f}'])\n\n    table_data = [header]\n    table_rows = list(zip(*table_columns))\n    table_data += table_rows\n    table = AsciiTable(table_data)\n    table.inner_footing_row_border = True\n    print_log('\\n' + table.table, logger=logger)\n\n    return ret_dict\n"
  },
  {
    "path": "mmdet3d/core/evaluation/waymo_utils/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .prediction_kitti_to_waymo import KITTI2Waymo\n\n__all__ = ['KITTI2Waymo']\n"
  },
  {
    "path": "mmdet3d/core/evaluation/waymo_utils/prediction_kitti_to_waymo.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nr\"\"\"Adapted from `Waymo to KITTI converter\n    <https://github.com/caizhongang/waymo_kitti_converter>`_.\n\"\"\"\n\ntry:\n    from waymo_open_dataset import dataset_pb2 as open_dataset\nexcept ImportError:\n    raise ImportError(\n        'Please run \"pip install waymo-open-dataset-tf-2-1-0==1.2.0\" '\n        'to install the official devkit first.')\n\nfrom glob import glob\nfrom os.path import join\n\nimport mmcv\nimport numpy as np\nimport tensorflow as tf\nfrom waymo_open_dataset import label_pb2\nfrom waymo_open_dataset.protos import metrics_pb2\n\n\nclass KITTI2Waymo(object):\n    \"\"\"KITTI predictions to Waymo converter.\n\n    This class serves as the converter to change predictions from KITTI to\n    Waymo format.\n\n    Args:\n        kitti_result_files (list[dict]): Predictions in KITTI format.\n        waymo_tfrecords_dir (str): Directory to load waymo raw data.\n        waymo_results_save_dir (str): Directory to save converted predictions\n            in waymo format (.bin files).\n        waymo_results_final_path (str): Path to save combined\n            predictions in waymo format (.bin file), like 'a/b/c.bin'.\n        prefix (str): Prefix of filename. In general, 0 for training, 1 for\n            validation and 2 for testing.\n        workers (str): Number of parallel processes.\n    \"\"\"\n\n    def __init__(self,\n                 kitti_result_files,\n                 waymo_tfrecords_dir,\n                 waymo_results_save_dir,\n                 waymo_results_final_path,\n                 prefix,\n                 workers=64):\n\n        self.kitti_result_files = kitti_result_files\n        self.waymo_tfrecords_dir = waymo_tfrecords_dir\n        self.waymo_results_save_dir = waymo_results_save_dir\n        self.waymo_results_final_path = waymo_results_final_path\n        self.prefix = prefix\n        self.workers = int(workers)\n        self.name2idx = {}\n        for idx, result in enumerate(kitti_result_files):\n            if len(result['sample_idx']) > 0:\n                self.name2idx[str(result['sample_idx'][0])] = idx\n\n        # turn on eager execution for older tensorflow versions\n        if int(tf.__version__.split('.')[0]) < 2:\n            tf.enable_eager_execution()\n\n        self.k2w_cls_map = {\n            'Car': label_pb2.Label.TYPE_VEHICLE,\n            'Pedestrian': label_pb2.Label.TYPE_PEDESTRIAN,\n            'Sign': label_pb2.Label.TYPE_SIGN,\n            'Cyclist': label_pb2.Label.TYPE_CYCLIST,\n        }\n\n        self.T_ref_to_front_cam = np.array([[0.0, 0.0, 1.0, 0.0],\n                                            [-1.0, 0.0, 0.0, 0.0],\n                                            [0.0, -1.0, 0.0, 0.0],\n                                            [0.0, 0.0, 0.0, 1.0]])\n\n        self.get_file_names()\n        self.create_folder()\n\n    def get_file_names(self):\n        \"\"\"Get file names of waymo raw data.\"\"\"\n        self.waymo_tfrecord_pathnames = sorted(\n            glob(join(self.waymo_tfrecords_dir, '*.tfrecord')))\n        print(len(self.waymo_tfrecord_pathnames), 'tfrecords found.')\n\n    def create_folder(self):\n        \"\"\"Create folder for data conversion.\"\"\"\n        mmcv.mkdir_or_exist(self.waymo_results_save_dir)\n\n    def parse_objects(self, kitti_result, T_k2w, context_name,\n                      frame_timestamp_micros):\n        \"\"\"Parse one prediction with several instances in kitti format and\n        convert them to `Object` proto.\n\n        Args:\n            kitti_result (dict): Predictions in kitti format.\n\n                - name (np.ndarray): Class labels of predictions.\n                - dimensions (np.ndarray): Height, width, length of boxes.\n                - location (np.ndarray): Bottom center of boxes (x, y, z).\n                - rotation_y (np.ndarray): Orientation of boxes.\n                - score (np.ndarray): Scores of predictions.\n            T_k2w (np.ndarray): Transformation matrix from kitti to waymo.\n            context_name (str): Context name of the frame.\n            frame_timestamp_micros (int): Frame timestamp.\n\n        Returns:\n            :obj:`Object`: Predictions in waymo dataset Object proto.\n        \"\"\"\n\n        def parse_one_object(instance_idx):\n            \"\"\"Parse one instance in kitti format and convert them to `Object`\n            proto.\n\n            Args:\n                instance_idx (int): Index of the instance to be converted.\n\n            Returns:\n                :obj:`Object`: Predicted instance in waymo dataset\n                    Object proto.\n            \"\"\"\n            cls = kitti_result['name'][instance_idx]\n            length = round(kitti_result['dimensions'][instance_idx, 0], 4)\n            height = round(kitti_result['dimensions'][instance_idx, 1], 4)\n            width = round(kitti_result['dimensions'][instance_idx, 2], 4)\n            x = round(kitti_result['location'][instance_idx, 0], 4)\n            y = round(kitti_result['location'][instance_idx, 1], 4)\n            z = round(kitti_result['location'][instance_idx, 2], 4)\n            rotation_y = round(kitti_result['rotation_y'][instance_idx], 4)\n            score = round(kitti_result['score'][instance_idx], 4)\n\n            # y: downwards; move box origin from bottom center (kitti) to\n            # true center (waymo)\n            y -= height / 2\n            # frame transformation: kitti -> waymo\n            x, y, z = self.transform(T_k2w, x, y, z)\n\n            # different conventions\n            heading = -(rotation_y + np.pi / 2)\n            while heading < -np.pi:\n                heading += 2 * np.pi\n            while heading > np.pi:\n                heading -= 2 * np.pi\n\n            box = label_pb2.Label.Box()\n            box.center_x = x\n            box.center_y = y\n            box.center_z = z\n            box.length = length\n            box.width = width\n            box.height = height\n            box.heading = heading\n\n            o = metrics_pb2.Object()\n            o.object.box.CopyFrom(box)\n            o.object.type = self.k2w_cls_map[cls]\n            o.score = score\n\n            o.context_name = context_name\n            o.frame_timestamp_micros = frame_timestamp_micros\n\n            return o\n\n        objects = metrics_pb2.Objects()\n\n        for instance_idx in range(len(kitti_result['name'])):\n            o = parse_one_object(instance_idx)\n            objects.objects.append(o)\n\n        return objects\n\n    def convert_one(self, file_idx):\n        \"\"\"Convert action for single file.\n\n        Args:\n            file_idx (int): Index of the file to be converted.\n        \"\"\"\n        file_pathname = self.waymo_tfrecord_pathnames[file_idx]\n        file_data = tf.data.TFRecordDataset(file_pathname, compression_type='')\n\n        for frame_num, frame_data in enumerate(file_data):\n            frame = open_dataset.Frame()\n            frame.ParseFromString(bytearray(frame_data.numpy()))\n\n            filename = f'{self.prefix}{file_idx:03d}{frame_num:03d}'\n\n            for camera in frame.context.camera_calibrations:\n                # FRONT = 1, see dataset.proto for details\n                if camera.name == 1:\n                    T_front_cam_to_vehicle = np.array(\n                        camera.extrinsic.transform).reshape(4, 4)\n\n            T_k2w = T_front_cam_to_vehicle @ self.T_ref_to_front_cam\n\n            context_name = frame.context.name\n            frame_timestamp_micros = frame.timestamp_micros\n\n            if filename in self.name2idx:\n                kitti_result = \\\n                    self.kitti_result_files[self.name2idx[filename]]\n                objects = self.parse_objects(kitti_result, T_k2w, context_name,\n                                             frame_timestamp_micros)\n            else:\n                print(filename, 'not found.')\n                objects = metrics_pb2.Objects()\n\n            with open(\n                    join(self.waymo_results_save_dir, f'{filename}.bin'),\n                    'wb') as f:\n                f.write(objects.SerializeToString())\n\n    def convert(self):\n        \"\"\"Convert action.\"\"\"\n        print('Start converting ...')\n        mmcv.track_parallel_progress(self.convert_one, range(len(self)),\n                                     self.workers)\n        print('\\nFinished ...')\n\n        # combine all files into one .bin\n        pathnames = sorted(glob(join(self.waymo_results_save_dir, '*.bin')))\n        combined = self.combine(pathnames)\n\n        with open(self.waymo_results_final_path, 'wb') as f:\n            f.write(combined.SerializeToString())\n\n    def __len__(self):\n        \"\"\"Length of the filename list.\"\"\"\n        return len(self.waymo_tfrecord_pathnames)\n\n    def transform(self, T, x, y, z):\n        \"\"\"Transform the coordinates with matrix T.\n\n        Args:\n            T (np.ndarray): Transformation matrix.\n            x(float): Coordinate in x axis.\n            y(float): Coordinate in y axis.\n            z(float): Coordinate in z axis.\n\n        Returns:\n            list: Coordinates after transformation.\n        \"\"\"\n        pt_bef = np.array([x, y, z, 1.0]).reshape(4, 1)\n        pt_aft = np.matmul(T, pt_bef)\n        return pt_aft[:3].flatten().tolist()\n\n    def combine(self, pathnames):\n        \"\"\"Combine predictions in waymo format for each sample together.\n\n        Args:\n            pathnames (str): Paths to save predictions.\n\n        Returns:\n            :obj:`Objects`: Combined predictions in Objects proto.\n        \"\"\"\n        combined = metrics_pb2.Objects()\n\n        for pathname in pathnames:\n            objects = metrics_pb2.Objects()\n            with open(pathname, 'rb') as f:\n                objects.ParseFromString(f.read())\n            for o in objects.objects:\n                combined.objects.append(o)\n\n        return combined\n"
  },
  {
    "path": "mmdet3d/core/hook/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .ema import MEGVIIEMAHook\nfrom .utils import is_parallel\nfrom .sequentialsontrol import SequentialControlHook\n__all__ = ['MEGVIIEMAHook', 'is_parallel', 'SequentialControlHook']\n"
  },
  {
    "path": "mmdet3d/core/hook/ema.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n# modified from megvii-bevdepth.\nimport math\nimport os\nfrom copy import deepcopy\n\nimport torch\nfrom mmcv.runner import load_state_dict\nfrom mmcv.runner.dist_utils import master_only\nfrom mmcv.runner.hooks import HOOKS, Hook\n\nfrom mmdet3d.core.hook.utils import is_parallel\n\n__all__ = ['ModelEMA']\n\n\nclass ModelEMA:\n    \"\"\"Model Exponential Moving Average from https://github.com/rwightman/\n    pytorch-image-models Keep a moving average of everything in the model\n    state_dict (parameters and buffers).\n\n    This is intended to allow functionality like\n    https://www.tensorflow.org/api_docs/python/tf/train/\n    ExponentialMovingAverage\n    A smoothed version of the weights is necessary for some training\n    schemes to perform well.\n    This class is sensitive where it is initialized in the sequence\n    of model init, GPU assignment and distributed training wrappers.\n    \"\"\"\n\n    def __init__(self, model, decay=0.9999, updates=0):\n        \"\"\"\n        Args:\n            model (nn.Module): model to apply EMA.\n            decay (float): ema decay reate.\n            updates (int): counter of EMA updates.\n        \"\"\"\n        # Create EMA(FP32)\n        self.ema_model = deepcopy(model).eval()\n        self.ema = self.ema_model.module.module if is_parallel(\n            self.ema_model.module) else self.ema_model.module\n        self.updates = updates\n        # decay exponential ramp (to help early epochs)\n        self.decay = lambda x: decay * (1 - math.exp(-x / 2000))\n        for p in self.ema.parameters():\n            p.requires_grad_(False)\n\n    def update(self, trainer, model):\n        # Update EMA parameters\n        with torch.no_grad():\n            self.updates += 1\n            d = self.decay(self.updates)\n\n            msd = model.module.state_dict() if is_parallel(\n                model) else model.state_dict()  # model state_dict\n            for k, v in self.ema.state_dict().items():\n                if v.dtype.is_floating_point:\n                    v *= d\n                    v += (1.0 - d) * msd[k].detach()\n\n\n@HOOKS.register_module()\nclass MEGVIIEMAHook(Hook):\n    \"\"\"EMAHook used in BEVDepth.\n\n    Modified from https://github.com/Megvii-Base\n    Detection/BEVDepth/blob/main/callbacks/ema.py.\n    \"\"\"\n\n    def __init__(self, init_updates=0, decay=0.9990, resume=None, interval=-1):\n        super().__init__()\n        self.init_updates = init_updates\n        self.resume = resume\n        self.decay = decay\n        self.interval = interval\n\n    def before_run(self, runner):\n        from torch.nn.modules.batchnorm import SyncBatchNorm\n\n        bn_model_list = list()\n        bn_model_dist_group_list = list()\n        for model_ref in runner.model.modules():\n            if isinstance(model_ref, SyncBatchNorm):\n                bn_model_list.append(model_ref)\n                bn_model_dist_group_list.append(model_ref.process_group)\n                model_ref.process_group = None\n        runner.ema_model = ModelEMA(runner.model, self.decay)\n\n        for bn_model, dist_group in zip(bn_model_list,\n                                        bn_model_dist_group_list):\n            bn_model.process_group = dist_group\n        runner.ema_model.updates = self.init_updates\n\n        if self.resume is not None:\n            runner.logger.info(f'resume ema checkpoint from {self.resume}')\n            cpt = torch.load(self.resume, map_location='cpu')\n            load_state_dict(runner.ema_model.ema, cpt['state_dict'])\n            runner.ema_model.updates = cpt['updates']\n\n    def after_train_iter(self, runner):\n        runner.ema_model.update(runner, runner.model.module)\n        curr_step = runner.iter\n        if self.interval>0:\n            if curr_step % self.interval==0 and curr_step>0:\n                self.save_checkpoint_iter(runner)\n            \n\n    def after_train_epoch(self, runner):\n        self.save_checkpoint(runner)\n\n    def after_run(self, runner):\n        self.save_checkpoint_iter(runner)\n\n    @master_only\n    def save_checkpoint(self, runner):\n        state_dict = runner.ema_model.ema.state_dict()\n        ema_checkpoint = {\n            'epoch': runner.epoch,\n            'state_dict': state_dict,\n            'updates': runner.ema_model.updates\n        }\n        save_path = f'epoch_{runner.epoch+1}_ema.pth'\n        save_path = os.path.join(runner.work_dir, save_path)\n        torch.save(ema_checkpoint, save_path)\n        runner.logger.info(f'Saving ema checkpoint at {save_path}')\n    \n    @master_only\n    def save_checkpoint_iter(self, runner):\n        state_dict = runner.ema_model.ema.state_dict()\n        ema_checkpoint = {\n            'iter': runner.iter,\n            'state_dict': state_dict,\n            'updates': runner.ema_model.updates\n        }\n        save_path = f'iter_{runner.iter}_ema.pth'\n        save_path = os.path.join(runner.work_dir, save_path)\n        torch.save(ema_checkpoint, save_path)\n        runner.logger.info(f'Saving ema checkpoint at {save_path}')"
  },
  {
    "path": "mmdet3d/core/hook/forge_load.py",
    "content": "# -*- coding: utf-8 -*-\n#!/usr/bin/python                        \n##################################################\n# AUTHOR : Yandi LI\n# CREATED_AT : 2018-11-01\n# LAST_MODIFIED : 2018-11-07 12:55:32\n# USAGE : python -u main.py\n##################################################\nfrom __future__ import division\nimport math\nimport threading\nimport time\nfrom collections import deque\n\nfrom numba import cuda\nimport numpy\nfrom mmcv.runner.hooks import HOOKS, Hook\nimport os\nlocal_rank = int(os.environ.get('LOCAL_RANK', 0))\ncuda.select_device(local_rank)\n\nclass Monitor(threading.Thread):\n  def __init__(self):\n    super(Monitor, self).__init__()\n    self.setDaemon(True)\n    self._queue = deque([0] * 5, 5)\n    self.avg_load = 0\n    self.max_load = 0\n\n  def update(self, ):\n    load = self.get_current_load()\n    self._queue.append(load)\n    self.avg_load = sum(self._queue)/len(self._queue)\n    self.max_load = max(self._queue)\n\n  def run(self):\n    while True:\n      self.update()\n      time.sleep(1)\n\n  @staticmethod\n  def get_current_load():\n    import GPUtil\n    gpu = GPUtil.getGPUs()[local_rank]\n    load = gpu.load * 100\n    return load\n\n@HOOKS.register_module()\nclass ForgeLoadWorker(Hook):\n\n  def __init__(self, target=50):\n    super().__init__()\n    if os.path.isfile('/workspace/unlock'):\n        try:\n            os.remove('/workspace/unlock')\n        except:\n            pass\n\n  def after_run(self, runner):\n    import os\n    target = float(os.environ.get(\"TARGET\", 80))\n    data = numpy.zeros(512)\n    self._device_data = cuda.to_device(data)\n    self.threadsperblock = 128\n    self.blockspergrid = int(math.ceil(data.shape[0] / self.threadsperblock)) \n    self.target = target\n    self.multiplier = 1000\n   \n    self.main(target)\n    pass\n\n  def __str__(self):\n    return \"threadsperblock: {}, blockspergrid: {}\".format(self.threadsperblock, self.blockspergrid)\n\n\n  @staticmethod\n  @cuda.jit\n  def my_kernel(io_array):\n    \"\"\" CUDA kernel \n    \"\"\"\n    pos = cuda.grid(1)\n    tx = cuda.threadIdx.x \n    if pos < io_array.size:\n      io_array[pos] += tx # do the computation\n\n\n  def run_awhile(self, sec=10):\n    start = time.time()\n    while time.time() - start < sec:\n      self.my_kernel[int(self.multiplier * self.blockspergrid), self.threadsperblock](self._device_data)\n\n\n  def idle_awhile(self, sec=5):\n    time.sleep(sec)\n   \n\n  def _boost(self, rate=1.2):\n    self.multiplier *= rate\n\n\n  def _slow_down(self, rate=1.5):\n    self.multiplier /= rate\n    \n\n  def adjust_speed(self, avg_load):\n    if avg_load < self.target * 0.9:\n      self._boost()\n      # print(\"Adjusted speed: boost\")\n      return \n    if avg_load > self.target * 1.2:\n      self._slow_down()\n      # print(\"Adjusted speed: slow_down\")\n      return \n\n\n  # classmethod\n  def main(self, target=50):\n    monitor = Monitor()\n    monitor.start()\n    # print(\"Monitor started: %s\" % monitor.is_alive())\n    time.sleep(5)\n    # print(\"Initial average load\", monitor.avg_load)\n\n    while True:\n      try:\n        if os.path.isfile('/workspace/unlock'):\n          break\n        if monitor.max_load > self.target * 1.1:\n          # print(\"Idle for 5s with load %s\" % monitor.max_load)\n          self.idle_awhile(5)\n          continue\n        # print(\"Run for 10s with load %s and multiplier %s\" % (monitor.avg_load, self.multiplier))\n        self.run_awhile(10)\n        self.adjust_speed(monitor.avg_load)\n      except:\n        pass\n\n\n# if __name__ == \"__main__\":\n#   import os\n#   target = float(os.environ.get(\"TARGET\", 80))\n#   Worker.main(target)\n"
  },
  {
    "path": "mmdet3d/core/hook/sequentialsontrol.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.runner.hooks import HOOKS, Hook\nfrom mmdet3d.core.hook.utils import is_parallel\n\n__all__ = ['SequentialControlHook']\n\n\n@HOOKS.register_module()\nclass SequentialControlHook(Hook):\n    \"\"\" \"\"\"\n\n    def __init__(self, temporal_start_epoch=1, temporal_start_iter=-1):\n        super().__init__()\n        self.temporal_start_epoch=temporal_start_epoch\n        self.temporal_start_iter = temporal_start_iter\n\n    def set_temporal_flag(self, runner, flag):\n        if is_parallel(runner.model.module):\n            runner.model.module.module.with_prev=flag\n        else:\n            runner.model.module.with_prev = flag\n\n    def set_temporal_flag_v2(self, runner, flag):\n        if is_parallel(runner.model.module):\n            runner.model.module.module.do_history=flag\n        else:\n            runner.model.module.do_history = flag\n\n    def before_run(self, runner):\n        self.set_temporal_flag(runner, False)\n        if self.temporal_start_iter>0:\n            self.set_temporal_flag_v2(runner, False)\n\n    def before_train_epoch(self, runner):\n        if runner.epoch > self.temporal_start_epoch and self.temporal_start_iter<0:\n            self.set_temporal_flag(runner, True)\n\n    def after_train_iter(self, runner):\n \n        curr_step = runner.iter\n        if curr_step >= self.temporal_start_iter and self.temporal_start_iter>=0:\n            self.set_temporal_flag_v2(runner, True)\n\n"
  },
  {
    "path": "mmdet3d/core/hook/utils.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom torch import nn\n\n__all__ = ['is_parallel']\n\n\ndef is_parallel(model):\n    \"\"\"check if model is in parallel mode.\"\"\"\n    parallel_type = (\n        nn.parallel.DataParallel,\n        nn.parallel.DistributedDataParallel,\n    )\n    return isinstance(model, parallel_type)\n"
  },
  {
    "path": "mmdet3d/core/points/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .base_points import BasePoints\nfrom .cam_points import CameraPoints\nfrom .depth_points import DepthPoints\nfrom .lidar_points import LiDARPoints\n\n__all__ = ['BasePoints', 'CameraPoints', 'DepthPoints', 'LiDARPoints']\n\n\ndef get_points_type(points_type):\n    \"\"\"Get the class of points according to coordinate type.\n\n    Args:\n        points_type (str): The type of points coordinate.\n            The valid value are \"CAMERA\", \"LIDAR\", or \"DEPTH\".\n\n    Returns:\n        class: Points type.\n    \"\"\"\n    if points_type == 'CAMERA':\n        points_cls = CameraPoints\n    elif points_type == 'LIDAR':\n        points_cls = LiDARPoints\n    elif points_type == 'DEPTH':\n        points_cls = DepthPoints\n    else:\n        raise ValueError('Only \"points_type\" of \"CAMERA\", \"LIDAR\", or \"DEPTH\"'\n                         f' are supported, got {points_type}')\n\n    return points_cls\n"
  },
  {
    "path": "mmdet3d/core/points/base_points.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\nfrom abc import abstractmethod\n\nimport numpy as np\nimport torch\n\nfrom ..bbox.structures.utils import rotation_3d_in_axis\n\n\nclass BasePoints(object):\n    \"\"\"Base class for Points.\n\n    Args:\n        tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.\n        points_dim (int, optional): Number of the dimension of a point.\n            Each row is (x, y, z). Defaults to 3.\n        attribute_dims (dict, optional): Dictionary to indicate the\n            meaning of extra dimension. Defaults to None.\n\n    Attributes:\n        tensor (torch.Tensor): Float matrix of N x points_dim.\n        points_dim (int): Integer indicating the dimension of a point.\n            Each row is (x, y, z, ...).\n        attribute_dims (bool): Dictionary to indicate the meaning of extra\n            dimension. Defaults to None.\n        rotation_axis (int): Default rotation axis for points rotation.\n    \"\"\"\n\n    def __init__(self, tensor, points_dim=3, attribute_dims=None):\n        if isinstance(tensor, torch.Tensor):\n            device = tensor.device\n        else:\n            device = torch.device('cpu')\n        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)\n        if tensor.numel() == 0:\n            # Use reshape, so we don't end up creating a new tensor that\n            # does not depend on the inputs (and consequently confuses jit)\n            tensor = tensor.reshape((0, points_dim)).to(\n                dtype=torch.float32, device=device)\n        assert tensor.dim() == 2 and tensor.size(-1) == \\\n            points_dim, tensor.size()\n\n        self.tensor = tensor\n        self.points_dim = points_dim\n        self.attribute_dims = attribute_dims\n        self.rotation_axis = 0\n\n    @property\n    def coord(self):\n        \"\"\"torch.Tensor: Coordinates of each point in shape (N, 3).\"\"\"\n        return self.tensor[:, :3]\n\n    @coord.setter\n    def coord(self, tensor):\n        \"\"\"Set the coordinates of each point.\"\"\"\n        try:\n            tensor = tensor.reshape(self.shape[0], 3)\n        except (RuntimeError, ValueError):  # for torch.Tensor and np.ndarray\n            raise ValueError(f'got unexpected shape {tensor.shape}')\n        if not isinstance(tensor, torch.Tensor):\n            tensor = self.tensor.new_tensor(tensor)\n        self.tensor[:, :3] = tensor\n\n    @property\n    def height(self):\n        \"\"\"torch.Tensor:\n            A vector with height of each point in shape (N, 1), or None.\"\"\"\n        if self.attribute_dims is not None and \\\n                'height' in self.attribute_dims.keys():\n            return self.tensor[:, self.attribute_dims['height']]\n        else:\n            return None\n\n    @height.setter\n    def height(self, tensor):\n        \"\"\"Set the height of each point.\"\"\"\n        try:\n            tensor = tensor.reshape(self.shape[0])\n        except (RuntimeError, ValueError):  # for torch.Tensor and np.ndarray\n            raise ValueError(f'got unexpected shape {tensor.shape}')\n        if not isinstance(tensor, torch.Tensor):\n            tensor = self.tensor.new_tensor(tensor)\n        if self.attribute_dims is not None and \\\n                'height' in self.attribute_dims.keys():\n            self.tensor[:, self.attribute_dims['height']] = tensor\n        else:\n            # add height attribute\n            if self.attribute_dims is None:\n                self.attribute_dims = dict()\n            attr_dim = self.shape[1]\n            self.tensor = torch.cat([self.tensor, tensor.unsqueeze(1)], dim=1)\n            self.attribute_dims.update(dict(height=attr_dim))\n            self.points_dim += 1\n\n    @property\n    def color(self):\n        \"\"\"torch.Tensor:\n            A vector with color of each point in shape (N, 3), or None.\"\"\"\n        if self.attribute_dims is not None and \\\n                'color' in self.attribute_dims.keys():\n            return self.tensor[:, self.attribute_dims['color']]\n        else:\n            return None\n\n    @color.setter\n    def color(self, tensor):\n        \"\"\"Set the color of each point.\"\"\"\n        try:\n            tensor = tensor.reshape(self.shape[0], 3)\n        except (RuntimeError, ValueError):  # for torch.Tensor and np.ndarray\n            raise ValueError(f'got unexpected shape {tensor.shape}')\n        if tensor.max() >= 256 or tensor.min() < 0:\n            warnings.warn('point got color value beyond [0, 255]')\n        if not isinstance(tensor, torch.Tensor):\n            tensor = self.tensor.new_tensor(tensor)\n        if self.attribute_dims is not None and \\\n                'color' in self.attribute_dims.keys():\n            self.tensor[:, self.attribute_dims['color']] = tensor\n        else:\n            # add color attribute\n            if self.attribute_dims is None:\n                self.attribute_dims = dict()\n            attr_dim = self.shape[1]\n            self.tensor = torch.cat([self.tensor, tensor], dim=1)\n            self.attribute_dims.update(\n                dict(color=[attr_dim, attr_dim + 1, attr_dim + 2]))\n            self.points_dim += 3\n\n    @property\n    def shape(self):\n        \"\"\"torch.Shape: Shape of points.\"\"\"\n        return self.tensor.shape\n\n    def shuffle(self):\n        \"\"\"Shuffle the points.\n\n        Returns:\n            torch.Tensor: The shuffled index.\n        \"\"\"\n        idx = torch.randperm(self.__len__(), device=self.tensor.device)\n        self.tensor = self.tensor[idx]\n        return idx\n\n    def rotate(self, rotation, axis=None):\n        \"\"\"Rotate points with the given rotation matrix or angle.\n\n        Args:\n            rotation (float | np.ndarray | torch.Tensor): Rotation matrix\n                or angle.\n            axis (int, optional): Axis to rotate at. Defaults to None.\n        \"\"\"\n        if not isinstance(rotation, torch.Tensor):\n            rotation = self.tensor.new_tensor(rotation)\n        assert rotation.shape == torch.Size([3, 3]) or \\\n            rotation.numel() == 1, f'invalid rotation shape {rotation.shape}'\n\n        if axis is None:\n            axis = self.rotation_axis\n\n        if rotation.numel() == 1:\n            rotated_points, rot_mat_T = rotation_3d_in_axis(\n                self.tensor[:, :3][None], rotation, axis=axis, return_mat=True)\n            self.tensor[:, :3] = rotated_points.squeeze(0)\n            rot_mat_T = rot_mat_T.squeeze(0)\n        else:\n            # rotation.numel() == 9\n            self.tensor[:, :3] = self.tensor[:, :3] @ rotation\n            rot_mat_T = rotation\n\n        return rot_mat_T\n\n    @abstractmethod\n    def flip(self, bev_direction='horizontal'):\n        \"\"\"Flip the points along given BEV direction.\n\n        Args:\n            bev_direction (str): Flip direction (horizontal or vertical).\n        \"\"\"\n        pass\n\n    def translate(self, trans_vector):\n        \"\"\"Translate points with the given translation vector.\n\n        Args:\n            trans_vector (np.ndarray, torch.Tensor): Translation\n                vector of size 3 or nx3.\n        \"\"\"\n        if not isinstance(trans_vector, torch.Tensor):\n            trans_vector = self.tensor.new_tensor(trans_vector)\n        trans_vector = trans_vector.squeeze(0)\n        if trans_vector.dim() == 1:\n            assert trans_vector.shape[0] == 3\n        elif trans_vector.dim() == 2:\n            assert trans_vector.shape[0] == self.tensor.shape[0] and \\\n                trans_vector.shape[1] == 3\n        else:\n            raise NotImplementedError(\n                f'Unsupported translation vector of shape {trans_vector.shape}'\n            )\n        self.tensor[:, :3] += trans_vector\n\n    def in_range_3d(self, point_range):\n        \"\"\"Check whether the points are in the given range.\n\n        Args:\n            point_range (list | torch.Tensor): The range of point\n                (x_min, y_min, z_min, x_max, y_max, z_max)\n\n        Note:\n            In the original implementation of SECOND, checking whether\n            a box in the range checks whether the points are in a convex\n            polygon, we try to reduce the burden for simpler cases.\n\n        Returns:\n            torch.Tensor: A binary vector indicating whether each point is\n                inside the reference range.\n        \"\"\"\n        in_range_flags = ((self.tensor[:, 0] > point_range[0])\n                          & (self.tensor[:, 1] > point_range[1])\n                          & (self.tensor[:, 2] > point_range[2])\n                          & (self.tensor[:, 0] < point_range[3])\n                          & (self.tensor[:, 1] < point_range[4])\n                          & (self.tensor[:, 2] < point_range[5]))\n        return in_range_flags\n\n    @property\n    def bev(self):\n        \"\"\"torch.Tensor: BEV of the points in shape (N, 2).\"\"\"\n        return self.tensor[:, [0, 1]]\n\n    def in_range_bev(self, point_range):\n        \"\"\"Check whether the points are in the given range.\n\n        Args:\n            point_range (list | torch.Tensor): The range of point\n                in order of (x_min, y_min, x_max, y_max).\n\n        Returns:\n            torch.Tensor: Indicating whether each point is inside\n                the reference range.\n        \"\"\"\n        in_range_flags = ((self.bev[:, 0] > point_range[0])\n                          & (self.bev[:, 1] > point_range[1])\n                          & (self.bev[:, 0] < point_range[2])\n                          & (self.bev[:, 1] < point_range[3]))\n        return in_range_flags\n\n    @abstractmethod\n    def convert_to(self, dst, rt_mat=None):\n        \"\"\"Convert self to ``dst`` mode.\n\n        Args:\n            dst (:obj:`CoordMode`): The target Box mode.\n            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and\n                translation matrix between different coordinates.\n                Defaults to None.\n                The conversion from `src` coordinates to `dst` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n\n        Returns:\n            :obj:`BasePoints`: The converted box of the same type\n                in the `dst` mode.\n        \"\"\"\n        pass\n\n    def scale(self, scale_factor):\n        \"\"\"Scale the points with horizontal and vertical scaling factors.\n\n        Args:\n            scale_factors (float): Scale factors to scale the points.\n        \"\"\"\n        self.tensor[:, :3] *= scale_factor\n\n    def __getitem__(self, item):\n        \"\"\"\n        Note:\n            The following usage are allowed:\n            1. `new_points = points[3]`:\n                return a `Points` that contains only one point.\n            2. `new_points = points[2:10]`:\n                return a slice of points.\n            3. `new_points = points[vector]`:\n                where vector is a torch.BoolTensor with `length = len(points)`.\n                Nonzero elements in the vector will be selected.\n            4. `new_points = points[3:11, vector]`:\n                return a slice of points and attribute dims.\n            5. `new_points = points[4:12, 2]`:\n                return a slice of points with single attribute.\n            Note that the returned Points might share storage with this Points,\n            subject to Pytorch's indexing semantics.\n\n        Returns:\n            :obj:`BasePoints`: A new object of\n                :class:`BasePoints` after indexing.\n        \"\"\"\n        original_type = type(self)\n        if isinstance(item, int):\n            return original_type(\n                self.tensor[item].view(1, -1),\n                points_dim=self.points_dim,\n                attribute_dims=self.attribute_dims)\n        elif isinstance(item, tuple) and len(item) == 2:\n            if isinstance(item[1], slice):\n                start = 0 if item[1].start is None else item[1].start\n                stop = self.tensor.shape[1] if \\\n                    item[1].stop is None else item[1].stop\n                step = 1 if item[1].step is None else item[1].step\n                item = list(item)\n                item[1] = list(range(start, stop, step))\n                item = tuple(item)\n            elif isinstance(item[1], int):\n                item = list(item)\n                item[1] = [item[1]]\n                item = tuple(item)\n            p = self.tensor[item[0], item[1]]\n\n            keep_dims = list(\n                set(item[1]).intersection(set(range(3, self.tensor.shape[1]))))\n            if self.attribute_dims is not None:\n                attribute_dims = self.attribute_dims.copy()\n                for key in self.attribute_dims.keys():\n                    cur_attribute_dims = attribute_dims[key]\n                    if isinstance(cur_attribute_dims, int):\n                        cur_attribute_dims = [cur_attribute_dims]\n                    intersect_attr = list(\n                        set(cur_attribute_dims).intersection(set(keep_dims)))\n                    if len(intersect_attr) == 1:\n                        attribute_dims[key] = intersect_attr[0]\n                    elif len(intersect_attr) > 1:\n                        attribute_dims[key] = intersect_attr\n                    else:\n                        attribute_dims.pop(key)\n            else:\n                attribute_dims = None\n        elif isinstance(item, (slice, np.ndarray, torch.Tensor)):\n            p = self.tensor[item]\n            attribute_dims = self.attribute_dims\n        else:\n            raise NotImplementedError(f'Invalid slice {item}!')\n\n        assert p.dim() == 2, \\\n            f'Indexing on Points with {item} failed to return a matrix!'\n        return original_type(\n            p, points_dim=p.shape[1], attribute_dims=attribute_dims)\n\n    def __len__(self):\n        \"\"\"int: Number of points in the current object.\"\"\"\n        return self.tensor.shape[0]\n\n    def __repr__(self):\n        \"\"\"str: Return a strings that describes the object.\"\"\"\n        return self.__class__.__name__ + '(\\n    ' + str(self.tensor) + ')'\n\n    @classmethod\n    def cat(cls, points_list):\n        \"\"\"Concatenate a list of Points into a single Points.\n\n        Args:\n            points_list (list[:obj:`BasePoints`]): List of points.\n\n        Returns:\n            :obj:`BasePoints`: The concatenated Points.\n        \"\"\"\n        assert isinstance(points_list, (list, tuple))\n        if len(points_list) == 0:\n            return cls(torch.empty(0))\n        assert all(isinstance(points, cls) for points in points_list)\n\n        # use torch.cat (v.s. layers.cat)\n        # so the returned points never share storage with input\n        cat_points = cls(\n            torch.cat([p.tensor for p in points_list], dim=0),\n            points_dim=points_list[0].tensor.shape[1],\n            attribute_dims=points_list[0].attribute_dims)\n        return cat_points\n\n    def to(self, device):\n        \"\"\"Convert current points to a specific device.\n\n        Args:\n            device (str | :obj:`torch.device`): The name of the device.\n\n        Returns:\n            :obj:`BasePoints`: A new boxes object on the\n                specific device.\n        \"\"\"\n        original_type = type(self)\n        return original_type(\n            self.tensor.to(device),\n            points_dim=self.points_dim,\n            attribute_dims=self.attribute_dims)\n\n    def clone(self):\n        \"\"\"Clone the Points.\n\n        Returns:\n            :obj:`BasePoints`: Box object with the same properties\n                as self.\n        \"\"\"\n        original_type = type(self)\n        return original_type(\n            self.tensor.clone(),\n            points_dim=self.points_dim,\n            attribute_dims=self.attribute_dims)\n\n    @property\n    def device(self):\n        \"\"\"str: The device of the points are on.\"\"\"\n        return self.tensor.device\n\n    def __iter__(self):\n        \"\"\"Yield a point as a Tensor of shape (4,) at a time.\n\n        Returns:\n            torch.Tensor: A point of shape (4,).\n        \"\"\"\n        yield from self.tensor\n\n    def new_point(self, data):\n        \"\"\"Create a new point object with data.\n\n        The new point and its tensor has the similar properties\n            as self and self.tensor, respectively.\n\n        Args:\n            data (torch.Tensor | numpy.array | list): Data to be copied.\n\n        Returns:\n            :obj:`BasePoints`: A new point object with ``data``,\n                the object's other properties are similar to ``self``.\n        \"\"\"\n        new_tensor = self.tensor.new_tensor(data) \\\n            if not isinstance(data, torch.Tensor) else data.to(self.device)\n        original_type = type(self)\n        return original_type(\n            new_tensor,\n            points_dim=self.points_dim,\n            attribute_dims=self.attribute_dims)\n"
  },
  {
    "path": "mmdet3d/core/points/cam_points.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .base_points import BasePoints\n\n\nclass CameraPoints(BasePoints):\n    \"\"\"Points of instances in CAM coordinates.\n\n    Args:\n        tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.\n        points_dim (int, optional): Number of the dimension of a point.\n            Each row is (x, y, z). Defaults to 3.\n        attribute_dims (dict, optional): Dictionary to indicate the\n            meaning of extra dimension. Defaults to None.\n\n    Attributes:\n        tensor (torch.Tensor): Float matrix of N x points_dim.\n        points_dim (int): Integer indicating the dimension of a point.\n            Each row is (x, y, z, ...).\n        attribute_dims (bool): Dictionary to indicate the meaning of extra\n            dimension. Defaults to None.\n        rotation_axis (int): Default rotation axis for points rotation.\n    \"\"\"\n\n    def __init__(self, tensor, points_dim=3, attribute_dims=None):\n        super(CameraPoints, self).__init__(\n            tensor, points_dim=points_dim, attribute_dims=attribute_dims)\n        self.rotation_axis = 1\n\n    def flip(self, bev_direction='horizontal'):\n        \"\"\"Flip the points along given BEV direction.\n\n        Args:\n            bev_direction (str): Flip direction (horizontal or vertical).\n        \"\"\"\n        if bev_direction == 'horizontal':\n            self.tensor[:, 0] = -self.tensor[:, 0]\n        elif bev_direction == 'vertical':\n            self.tensor[:, 2] = -self.tensor[:, 2]\n\n    @property\n    def bev(self):\n        \"\"\"torch.Tensor: BEV of the points in shape (N, 2).\"\"\"\n        return self.tensor[:, [0, 2]]\n\n    def convert_to(self, dst, rt_mat=None):\n        \"\"\"Convert self to ``dst`` mode.\n\n        Args:\n            dst (:obj:`CoordMode`): The target Point mode.\n            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and\n                translation matrix between different coordinates.\n                Defaults to None.\n                The conversion from `src` coordinates to `dst` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n\n        Returns:\n            :obj:`BasePoints`: The converted point of the same type\n                in the `dst` mode.\n        \"\"\"\n        from mmdet3d.core.bbox import Coord3DMode\n        return Coord3DMode.convert_point(\n            point=self, src=Coord3DMode.CAM, dst=dst, rt_mat=rt_mat)\n"
  },
  {
    "path": "mmdet3d/core/points/depth_points.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .base_points import BasePoints\n\n\nclass DepthPoints(BasePoints):\n    \"\"\"Points of instances in DEPTH coordinates.\n\n    Args:\n        tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.\n        points_dim (int, optional): Number of the dimension of a point.\n            Each row is (x, y, z). Defaults to 3.\n        attribute_dims (dict, optional): Dictionary to indicate the\n            meaning of extra dimension. Defaults to None.\n\n    Attributes:\n        tensor (torch.Tensor): Float matrix of N x points_dim.\n        points_dim (int): Integer indicating the dimension of a point.\n            Each row is (x, y, z, ...).\n        attribute_dims (bool): Dictionary to indicate the meaning of extra\n            dimension. Defaults to None.\n        rotation_axis (int): Default rotation axis for points rotation.\n    \"\"\"\n\n    def __init__(self, tensor, points_dim=3, attribute_dims=None):\n        super(DepthPoints, self).__init__(\n            tensor, points_dim=points_dim, attribute_dims=attribute_dims)\n        self.rotation_axis = 2\n\n    def flip(self, bev_direction='horizontal'):\n        \"\"\"Flip the points along given BEV direction.\n\n        Args:\n            bev_direction (str): Flip direction (horizontal or vertical).\n        \"\"\"\n        if bev_direction == 'horizontal':\n            self.tensor[:, 0] = -self.tensor[:, 0]\n        elif bev_direction == 'vertical':\n            self.tensor[:, 1] = -self.tensor[:, 1]\n\n    def convert_to(self, dst, rt_mat=None):\n        \"\"\"Convert self to ``dst`` mode.\n\n        Args:\n            dst (:obj:`CoordMode`): The target Point mode.\n            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and\n                translation matrix between different coordinates.\n                Defaults to None.\n                The conversion from `src` coordinates to `dst` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n\n        Returns:\n            :obj:`BasePoints`: The converted point of the same type\n                in the `dst` mode.\n        \"\"\"\n        from mmdet3d.core.bbox import Coord3DMode\n        return Coord3DMode.convert_point(\n            point=self, src=Coord3DMode.DEPTH, dst=dst, rt_mat=rt_mat)\n"
  },
  {
    "path": "mmdet3d/core/points/lidar_points.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .base_points import BasePoints\n\n\nclass LiDARPoints(BasePoints):\n    \"\"\"Points of instances in LIDAR coordinates.\n\n    Args:\n        tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.\n        points_dim (int, optional): Number of the dimension of a point.\n            Each row is (x, y, z). Defaults to 3.\n        attribute_dims (dict, optional): Dictionary to indicate the\n            meaning of extra dimension. Defaults to None.\n\n    Attributes:\n        tensor (torch.Tensor): Float matrix of N x points_dim.\n        points_dim (int): Integer indicating the dimension of a point.\n            Each row is (x, y, z, ...).\n        attribute_dims (bool): Dictionary to indicate the meaning of extra\n            dimension. Defaults to None.\n        rotation_axis (int): Default rotation axis for points rotation.\n    \"\"\"\n\n    def __init__(self, tensor, points_dim=3, attribute_dims=None):\n        super(LiDARPoints, self).__init__(\n            tensor, points_dim=points_dim, attribute_dims=attribute_dims)\n        self.rotation_axis = 2\n\n    def flip(self, bev_direction='horizontal'):\n        \"\"\"Flip the points along given BEV direction.\n\n        Args:\n            bev_direction (str): Flip direction (horizontal or vertical).\n        \"\"\"\n        if bev_direction == 'horizontal':\n            self.tensor[:, 1] = -self.tensor[:, 1]\n        elif bev_direction == 'vertical':\n            self.tensor[:, 0] = -self.tensor[:, 0]\n\n    def convert_to(self, dst, rt_mat=None):\n        \"\"\"Convert self to ``dst`` mode.\n\n        Args:\n            dst (:obj:`CoordMode`): The target Point mode.\n            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and\n                translation matrix between different coordinates.\n                Defaults to None.\n                The conversion from `src` coordinates to `dst` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n\n        Returns:\n            :obj:`BasePoints`: The converted point of the same type\n                in the `dst` mode.\n        \"\"\"\n        from mmdet3d.core.bbox import Coord3DMode\n        return Coord3DMode.convert_point(\n            point=self, src=Coord3DMode.LIDAR, dst=dst, rt_mat=rt_mat)\n"
  },
  {
    "path": "mmdet3d/core/post_processing/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks,\n                                        merge_aug_proposals, merge_aug_scores,\n                                        multiclass_nms)\nfrom .box3d_nms import (aligned_3d_nms, box3d_multiclass_nms, circle_nms,\n                        nms_bev, nms_normal_bev)\nfrom .merge_augs import merge_aug_bboxes_3d\n\n__all__ = [\n    'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',\n    'merge_aug_scores', 'merge_aug_masks', 'box3d_multiclass_nms',\n    'aligned_3d_nms', 'merge_aug_bboxes_3d', 'circle_nms', 'nms_bev',\n    'nms_normal_bev'\n]\n"
  },
  {
    "path": "mmdet3d/core/post_processing/box3d_nms.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numba\nimport numpy as np\nimport torch\nfrom mmcv.ops import nms, nms_rotated\n\n\ndef box3d_multiclass_nms(mlvl_bboxes,\n                         mlvl_bboxes_for_nms,\n                         mlvl_scores,\n                         score_thr,\n                         max_num,\n                         cfg,\n                         mlvl_dir_scores=None,\n                         mlvl_attr_scores=None,\n                         mlvl_bboxes2d=None):\n    \"\"\"Multi-class NMS for 3D boxes. The IoU used for NMS is defined as the 2D\n    IoU between BEV boxes.\n\n    Args:\n        mlvl_bboxes (torch.Tensor): Multi-level boxes with shape (N, M).\n            M is the dimensions of boxes.\n        mlvl_bboxes_for_nms (torch.Tensor): Multi-level boxes with shape\n            (N, 5) ([x1, y1, x2, y2, ry]). N is the number of boxes.\n            The coordinate system of the BEV boxes is counterclockwise.\n        mlvl_scores (torch.Tensor): Multi-level boxes with shape\n            (N, C + 1). N is the number of boxes. C is the number of classes.\n        score_thr (float): Score threshold to filter boxes with low\n            confidence.\n        max_num (int): Maximum number of boxes will be kept.\n        cfg (dict): Configuration dict of NMS.\n        mlvl_dir_scores (torch.Tensor, optional): Multi-level scores\n            of direction classifier. Defaults to None.\n        mlvl_attr_scores (torch.Tensor, optional): Multi-level scores\n            of attribute classifier. Defaults to None.\n        mlvl_bboxes2d (torch.Tensor, optional): Multi-level 2D bounding\n            boxes. Defaults to None.\n\n    Returns:\n        tuple[torch.Tensor]: Return results after nms, including 3D\n            bounding boxes, scores, labels, direction scores, attribute\n            scores (optional) and 2D bounding boxes (optional).\n    \"\"\"\n    # do multi class nms\n    # the fg class id range: [0, num_classes-1]\n    num_classes = mlvl_scores.shape[1] - 1\n    bboxes = []\n    scores = []\n    labels = []\n    dir_scores = []\n    attr_scores = []\n    bboxes2d = []\n    for i in range(0, num_classes):\n        # get bboxes and scores of this class\n        cls_inds = mlvl_scores[:, i] > score_thr\n        if not cls_inds.any():\n            continue\n\n        _scores = mlvl_scores[cls_inds, i]\n        _bboxes_for_nms = mlvl_bboxes_for_nms[cls_inds, :]\n\n        if cfg.use_rotate_nms:\n            nms_func = nms_bev\n        else:\n            nms_func = nms_normal_bev\n\n        selected = nms_func(_bboxes_for_nms, _scores, cfg.nms_thr)\n        _mlvl_bboxes = mlvl_bboxes[cls_inds, :]\n        bboxes.append(_mlvl_bboxes[selected])\n        scores.append(_scores[selected])\n        cls_label = mlvl_bboxes.new_full((len(selected), ),\n                                         i,\n                                         dtype=torch.long)\n        labels.append(cls_label)\n\n        if mlvl_dir_scores is not None:\n            _mlvl_dir_scores = mlvl_dir_scores[cls_inds]\n            dir_scores.append(_mlvl_dir_scores[selected])\n        if mlvl_attr_scores is not None:\n            _mlvl_attr_scores = mlvl_attr_scores[cls_inds]\n            attr_scores.append(_mlvl_attr_scores[selected])\n        if mlvl_bboxes2d is not None:\n            _mlvl_bboxes2d = mlvl_bboxes2d[cls_inds]\n            bboxes2d.append(_mlvl_bboxes2d[selected])\n\n    if bboxes:\n        bboxes = torch.cat(bboxes, dim=0)\n        scores = torch.cat(scores, dim=0)\n        labels = torch.cat(labels, dim=0)\n        if mlvl_dir_scores is not None:\n            dir_scores = torch.cat(dir_scores, dim=0)\n        if mlvl_attr_scores is not None:\n            attr_scores = torch.cat(attr_scores, dim=0)\n        if mlvl_bboxes2d is not None:\n            bboxes2d = torch.cat(bboxes2d, dim=0)\n        if bboxes.shape[0] > max_num:\n            _, inds = scores.sort(descending=True)\n            inds = inds[:max_num]\n            bboxes = bboxes[inds, :]\n            labels = labels[inds]\n            scores = scores[inds]\n            if mlvl_dir_scores is not None:\n                dir_scores = dir_scores[inds]\n            if mlvl_attr_scores is not None:\n                attr_scores = attr_scores[inds]\n            if mlvl_bboxes2d is not None:\n                bboxes2d = bboxes2d[inds]\n    else:\n        bboxes = mlvl_scores.new_zeros((0, mlvl_bboxes.size(-1)))\n        scores = mlvl_scores.new_zeros((0, ))\n        labels = mlvl_scores.new_zeros((0, ), dtype=torch.long)\n        if mlvl_dir_scores is not None:\n            dir_scores = mlvl_scores.new_zeros((0, ))\n        if mlvl_attr_scores is not None:\n            attr_scores = mlvl_scores.new_zeros((0, ))\n        if mlvl_bboxes2d is not None:\n            bboxes2d = mlvl_scores.new_zeros((0, 4))\n\n    results = (bboxes, scores, labels)\n\n    if mlvl_dir_scores is not None:\n        results = results + (dir_scores, )\n    if mlvl_attr_scores is not None:\n        results = results + (attr_scores, )\n    if mlvl_bboxes2d is not None:\n        results = results + (bboxes2d, )\n\n    return results\n\n\ndef aligned_3d_nms(boxes, scores, classes, thresh):\n    \"\"\"3D NMS for aligned boxes.\n\n    Args:\n        boxes (torch.Tensor): Aligned box with shape [n, 6].\n        scores (torch.Tensor): Scores of each box.\n        classes (torch.Tensor): Class of each box.\n        thresh (float): IoU threshold for nms.\n\n    Returns:\n        torch.Tensor: Indices of selected boxes.\n    \"\"\"\n    x1 = boxes[:, 0]\n    y1 = boxes[:, 1]\n    z1 = boxes[:, 2]\n    x2 = boxes[:, 3]\n    y2 = boxes[:, 4]\n    z2 = boxes[:, 5]\n    area = (x2 - x1) * (y2 - y1) * (z2 - z1)\n    zero = boxes.new_zeros(1, )\n\n    score_sorted = torch.argsort(scores)\n    pick = []\n    while (score_sorted.shape[0] != 0):\n        last = score_sorted.shape[0]\n        i = score_sorted[-1]\n        pick.append(i)\n\n        xx1 = torch.max(x1[i], x1[score_sorted[:last - 1]])\n        yy1 = torch.max(y1[i], y1[score_sorted[:last - 1]])\n        zz1 = torch.max(z1[i], z1[score_sorted[:last - 1]])\n        xx2 = torch.min(x2[i], x2[score_sorted[:last - 1]])\n        yy2 = torch.min(y2[i], y2[score_sorted[:last - 1]])\n        zz2 = torch.min(z2[i], z2[score_sorted[:last - 1]])\n        classes1 = classes[i]\n        classes2 = classes[score_sorted[:last - 1]]\n        inter_l = torch.max(zero, xx2 - xx1)\n        inter_w = torch.max(zero, yy2 - yy1)\n        inter_h = torch.max(zero, zz2 - zz1)\n\n        inter = inter_l * inter_w * inter_h\n        iou = inter / (area[i] + area[score_sorted[:last - 1]] - inter)\n        iou = iou * (classes1 == classes2).float()\n        score_sorted = score_sorted[torch.nonzero(\n            iou <= thresh, as_tuple=False).flatten()]\n\n    indices = boxes.new_tensor(pick, dtype=torch.long)\n    return indices\n\n\n@numba.jit(nopython=True)\ndef circle_nms(dets, thresh, post_max_size=83):\n    \"\"\"Circular NMS.\n\n    An object is only counted as positive if no other center\n    with a higher confidence exists within a radius r using a\n    bird-eye view distance metric.\n\n    Args:\n        dets (torch.Tensor): Detection results with the shape of [N, 3].\n        thresh (float): Value of threshold.\n        post_max_size (int, optional): Max number of prediction to be kept.\n            Defaults to 83.\n\n    Returns:\n        torch.Tensor: Indexes of the detections to be kept.\n    \"\"\"\n    x1 = dets[:, 0]\n    y1 = dets[:, 1]\n    scores = dets[:, 2]\n    order = scores.argsort()[::-1].astype(np.int32)  # highest->lowest\n    ndets = dets.shape[0]\n    suppressed = np.zeros((ndets), dtype=np.int32)\n    keep = []\n    for _i in range(ndets):\n        i = order[_i]  # start with highest score box\n        if suppressed[\n                i] == 1:  # if any box have enough iou with this, remove it\n            continue\n        keep.append(i)\n        for _j in range(_i + 1, ndets):\n            j = order[_j]\n            if suppressed[j] == 1:\n                continue\n            # calculate center distance between i and j box\n            dist = (x1[i] - x1[j])**2 + (y1[i] - y1[j])**2\n\n            # ovr = inter / areas[j]\n            if dist <= thresh:\n                suppressed[j] = 1\n\n    if post_max_size < len(keep):\n        return keep[:post_max_size]\n\n    return keep\n\n\n# This function duplicates functionality of mmcv.ops.iou_3d.nms_bev\n# from mmcv<=1.5, but using cuda ops from mmcv.ops.nms.nms_rotated.\n# Nms api will be unified in mmdetection3d one day.\ndef nms_bev(boxes, scores, thresh, pre_max_size=None, post_max_size=None,\n   xyxyr2xywhr=True):\n    \"\"\"NMS function GPU implementation (for BEV boxes). The overlap of two\n    boxes for IoU calculation is defined as the exact overlapping area of the\n    two boxes. In this function, one can also set ``pre_max_size`` and\n    ``post_max_size``.\n\n    Args:\n        boxes (torch.Tensor): Input boxes with the shape of [N, 5]\n            ([x1, y1, x2, y2, ry]).\n        scores (torch.Tensor): Scores of boxes with the shape of [N].\n        thresh (float): Overlap threshold of NMS.\n        pre_max_size (int, optional): Max size of boxes before NMS.\n            Default: None.\n        post_max_size (int, optional): Max size of boxes after NMS.\n            Default: None.\n\n    Returns:\n        torch.Tensor: Indexes after NMS.\n    \"\"\"\n    assert boxes.size(1) == 5, 'Input boxes shape should be [N, 5]'\n    order = scores.sort(0, descending=True)[1]\n    if pre_max_size is not None:\n        order = order[:pre_max_size]\n    boxes = boxes[order].contiguous()\n    scores = scores[order]\n\n    # xyxyr -> back to xywhr\n    # note: better skip this step before nms_bev call in the future\n    if xyxyr2xywhr:\n        boxes = torch.stack(\n            ((boxes[:, 0] + boxes[:, 2]) / 2, (boxes[:, 1] + boxes[:, 3]) / 2,\n             boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1], boxes[:, 4]),\n            dim=-1)\n\n    keep = nms_rotated(boxes, scores, thresh)[1]\n    keep = order[keep]\n    if post_max_size is not None:\n        keep = keep[:post_max_size]\n    return keep\n\n\n# This function duplicates functionality of mmcv.ops.iou_3d.nms_normal_bev\n# from mmcv<=1.5, but using cuda ops from mmcv.ops.nms.nms.\n# Nms api will be unified in mmdetection3d one day.\ndef nms_normal_bev(boxes, scores, thresh):\n    \"\"\"Normal NMS function GPU implementation (for BEV boxes). The overlap of\n    two boxes for IoU calculation is defined as the exact overlapping area of\n    the two boxes WITH their yaw angle set to 0.\n\n    Args:\n        boxes (torch.Tensor): Input boxes with shape (N, 5).\n        scores (torch.Tensor): Scores of predicted boxes with shape (N).\n        thresh (float): Overlap threshold of NMS.\n\n    Returns:\n        torch.Tensor: Remaining indices with scores in descending order.\n    \"\"\"\n    assert boxes.shape[1] == 5, 'Input boxes shape should be [N, 5]'\n    return nms(boxes[:, :-1], scores, thresh)[1]\n"
  },
  {
    "path": "mmdet3d/core/post_processing/merge_augs.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet3d.core.post_processing import nms_bev, nms_normal_bev\nfrom ..bbox import bbox3d2result, bbox3d_mapping_back, xywhr2xyxyr\n\n\ndef merge_aug_bboxes_3d(aug_results, img_metas, test_cfg):\n    \"\"\"Merge augmented detection 3D bboxes and scores.\n\n    Args:\n        aug_results (list[dict]): The dict of detection results.\n            The dict contains the following keys\n\n            - boxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.\n            - scores_3d (torch.Tensor): Detection scores.\n            - labels_3d (torch.Tensor): Predicted box labels.\n        img_metas (list[dict]): Meta information of each sample.\n        test_cfg (dict): Test config.\n\n    Returns:\n        dict: Bounding boxes results in cpu mode, containing merged results.\n\n            - boxes_3d (:obj:`BaseInstance3DBoxes`): Merged detection bbox.\n            - scores_3d (torch.Tensor): Merged detection scores.\n            - labels_3d (torch.Tensor): Merged predicted box labels.\n    \"\"\"\n\n    assert len(aug_results) == len(img_metas), \\\n        '\"aug_results\" should have the same length as \"img_metas\", got len(' \\\n        f'aug_results)={len(aug_results)} and len(img_metas)={len(img_metas)}'\n\n    recovered_bboxes = []\n    recovered_scores = []\n    recovered_labels = []\n\n    for bboxes, img_info in zip(aug_results, img_metas):\n        scale_factor = img_info[0]['pcd_scale_factor']\n        pcd_horizontal_flip = img_info[0]['pcd_horizontal_flip']\n        pcd_vertical_flip = img_info[0]['pcd_vertical_flip']\n        recovered_scores.append(bboxes['scores_3d'])\n        recovered_labels.append(bboxes['labels_3d'])\n        bboxes = bbox3d_mapping_back(bboxes['boxes_3d'], scale_factor,\n                                     pcd_horizontal_flip, pcd_vertical_flip)\n        recovered_bboxes.append(bboxes)\n\n    aug_bboxes = recovered_bboxes[0].cat(recovered_bboxes)\n    aug_bboxes_for_nms = xywhr2xyxyr(aug_bboxes.bev)\n    aug_scores = torch.cat(recovered_scores, dim=0)\n    aug_labels = torch.cat(recovered_labels, dim=0)\n\n    # TODO: use a more elegent way to deal with nms\n    if test_cfg.use_rotate_nms:\n        nms_func = nms_bev\n    else:\n        nms_func = nms_normal_bev\n\n    merged_bboxes = []\n    merged_scores = []\n    merged_labels = []\n\n    # Apply multi-class nms when merge bboxes\n    if len(aug_labels) == 0:\n        return bbox3d2result(aug_bboxes, aug_scores, aug_labels)\n\n    for class_id in range(torch.max(aug_labels).item() + 1):\n        class_inds = (aug_labels == class_id)\n        bboxes_i = aug_bboxes[class_inds]\n        bboxes_nms_i = aug_bboxes_for_nms[class_inds, :]\n        scores_i = aug_scores[class_inds]\n        labels_i = aug_labels[class_inds]\n        if len(bboxes_nms_i) == 0:\n            continue\n        selected = nms_func(bboxes_nms_i, scores_i, test_cfg.nms_thr)\n\n        merged_bboxes.append(bboxes_i[selected, :])\n        merged_scores.append(scores_i[selected])\n        merged_labels.append(labels_i[selected])\n\n    merged_bboxes = merged_bboxes[0].cat(merged_bboxes)\n    merged_scores = torch.cat(merged_scores, dim=0)\n    merged_labels = torch.cat(merged_labels, dim=0)\n\n    _, order = merged_scores.sort(0, descending=True)\n    num = min(test_cfg.max_num, len(aug_bboxes))\n    order = order[:num]\n\n    merged_bboxes = merged_bboxes[order]\n    merged_scores = merged_scores[order]\n    merged_labels = merged_labels[order]\n\n    return bbox3d2result(merged_bboxes, merged_scores, merged_labels)\n"
  },
  {
    "path": "mmdet3d/core/utils/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .array_converter import ArrayConverter, array_converter\nfrom .gaussian import (draw_heatmap_gaussian, ellip_gaussian2D, gaussian_2d,\n                       gaussian_radius, get_ellip_gaussian_2D)\n\n__all__ = [\n    'gaussian_2d', 'gaussian_radius', 'draw_heatmap_gaussian',\n    'ArrayConverter', 'array_converter', 'ellip_gaussian2D',\n    'get_ellip_gaussian_2D'\n]\n"
  },
  {
    "path": "mmdet3d/core/utils/array_converter.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport functools\nfrom inspect import getfullargspec\n\nimport numpy as np\nimport torch\n\n\ndef array_converter(to_torch=True,\n                    apply_to=tuple(),\n                    template_arg_name_=None,\n                    recover=True):\n    \"\"\"Wrapper function for data-type agnostic processing.\n\n    First converts input arrays to PyTorch tensors or NumPy ndarrays\n    for middle calculation, then convert output to original data-type if\n    `recover=True`.\n\n    Args:\n        to_torch (Bool, optional): Whether convert to PyTorch tensors\n            for middle calculation. Defaults to True.\n        apply_to (tuple[str], optional): The arguments to which we apply\n            data-type conversion. Defaults to an empty tuple.\n        template_arg_name_ (str, optional): Argument serving as the template (\n            return arrays should have the same dtype and device\n            as the template). Defaults to None. If None, we will use the\n            first argument in `apply_to` as the template argument.\n        recover (Bool, optional): Whether or not recover the wrapped function\n            outputs to the `template_arg_name_` type. Defaults to True.\n\n    Raises:\n        ValueError: When template_arg_name_ is not among all args, or\n            when apply_to contains an arg which is not among all args,\n            a ValueError will be raised. When the template argument or\n            an argument to convert is a list or tuple, and cannot be\n            converted to a NumPy array, a ValueError will be raised.\n        TypeError: When the type of the template argument or\n                an argument to convert does not belong to the above range,\n                or the contents of such an list-or-tuple-type argument\n                do not share the same data type, a TypeError is raised.\n\n    Returns:\n        (function): wrapped function.\n\n    Example:\n        >>> import torch\n        >>> import numpy as np\n        >>>\n        >>> # Use torch addition for a + b,\n        >>> # and convert return values to the type of a\n        >>> @array_converter(apply_to=('a', 'b'))\n        >>> def simple_add(a, b):\n        >>>     return a + b\n        >>>\n        >>> a = np.array([1.1])\n        >>> b = np.array([2.2])\n        >>> simple_add(a, b)\n        >>>\n        >>> # Use numpy addition for a + b,\n        >>> # and convert return values to the type of b\n        >>> @array_converter(to_torch=False, apply_to=('a', 'b'),\n        >>>                  template_arg_name_='b')\n        >>> def simple_add(a, b):\n        >>>     return a + b\n        >>>\n        >>> simple_add()\n        >>>\n        >>> # Use torch funcs for floor(a) if flag=True else ceil(a),\n        >>> # and return the torch tensor\n        >>> @array_converter(apply_to=('a',), recover=False)\n        >>> def floor_or_ceil(a, flag=True):\n        >>>     return torch.floor(a) if flag else torch.ceil(a)\n        >>>\n        >>> floor_or_ceil(a, flag=False)\n    \"\"\"\n\n    def array_converter_wrapper(func):\n        \"\"\"Outer wrapper for the function.\"\"\"\n\n        @functools.wraps(func)\n        def new_func(*args, **kwargs):\n            \"\"\"Inner wrapper for the arguments.\"\"\"\n            if len(apply_to) == 0:\n                return func(*args, **kwargs)\n\n            func_name = func.__name__\n\n            arg_spec = getfullargspec(func)\n\n            arg_names = arg_spec.args\n            arg_num = len(arg_names)\n            default_arg_values = arg_spec.defaults\n            if default_arg_values is None:\n                default_arg_values = []\n            no_default_arg_num = len(arg_names) - len(default_arg_values)\n\n            kwonly_arg_names = arg_spec.kwonlyargs\n            kwonly_default_arg_values = arg_spec.kwonlydefaults\n            if kwonly_default_arg_values is None:\n                kwonly_default_arg_values = {}\n\n            all_arg_names = arg_names + kwonly_arg_names\n\n            # in case there are args in the form of *args\n            if len(args) > arg_num:\n                named_args = args[:arg_num]\n                nameless_args = args[arg_num:]\n            else:\n                named_args = args\n                nameless_args = []\n\n            # template argument data type is used for all array-like arguments\n            if template_arg_name_ is None:\n                template_arg_name = apply_to[0]\n            else:\n                template_arg_name = template_arg_name_\n\n            if template_arg_name not in all_arg_names:\n                raise ValueError(f'{template_arg_name} is not among the '\n                                 f'argument list of function {func_name}')\n\n            # inspect apply_to\n            for arg_to_apply in apply_to:\n                if arg_to_apply not in all_arg_names:\n                    raise ValueError(f'{arg_to_apply} is not '\n                                     f'an argument of {func_name}')\n\n            new_args = []\n            new_kwargs = {}\n\n            converter = ArrayConverter()\n            target_type = torch.Tensor if to_torch else np.ndarray\n\n            # non-keyword arguments\n            for i, arg_value in enumerate(named_args):\n                if arg_names[i] in apply_to:\n                    new_args.append(\n                        converter.convert(\n                            input_array=arg_value, target_type=target_type))\n                else:\n                    new_args.append(arg_value)\n\n                if arg_names[i] == template_arg_name:\n                    template_arg_value = arg_value\n\n            kwonly_default_arg_values.update(kwargs)\n            kwargs = kwonly_default_arg_values\n\n            # keyword arguments and non-keyword arguments using default value\n            for i in range(len(named_args), len(all_arg_names)):\n                arg_name = all_arg_names[i]\n                if arg_name in kwargs:\n                    if arg_name in apply_to:\n                        new_kwargs[arg_name] = converter.convert(\n                            input_array=kwargs[arg_name],\n                            target_type=target_type)\n                    else:\n                        new_kwargs[arg_name] = kwargs[arg_name]\n                else:\n                    default_value = default_arg_values[i - no_default_arg_num]\n                    if arg_name in apply_to:\n                        new_kwargs[arg_name] = converter.convert(\n                            input_array=default_value, target_type=target_type)\n                    else:\n                        new_kwargs[arg_name] = default_value\n                if arg_name == template_arg_name:\n                    template_arg_value = kwargs[arg_name]\n\n            # add nameless args provided by *args (if exists)\n            new_args += nameless_args\n\n            return_values = func(*new_args, **new_kwargs)\n            converter.set_template(template_arg_value)\n\n            def recursive_recover(input_data):\n                if isinstance(input_data, (tuple, list)):\n                    new_data = []\n                    for item in input_data:\n                        new_data.append(recursive_recover(item))\n                    return tuple(new_data) if isinstance(input_data,\n                                                         tuple) else new_data\n                elif isinstance(input_data, dict):\n                    new_data = {}\n                    for k, v in input_data.items():\n                        new_data[k] = recursive_recover(v)\n                    return new_data\n                elif isinstance(input_data, (torch.Tensor, np.ndarray)):\n                    return converter.recover(input_data)\n                else:\n                    return input_data\n\n            if recover:\n                return recursive_recover(return_values)\n            else:\n                return return_values\n\n        return new_func\n\n    return array_converter_wrapper\n\n\nclass ArrayConverter:\n\n    SUPPORTED_NON_ARRAY_TYPES = (int, float, np.int8, np.int16, np.int32,\n                                 np.int64, np.uint8, np.uint16, np.uint32,\n                                 np.uint64, np.float16, np.float32, np.float64)\n\n    def __init__(self, template_array=None):\n        if template_array is not None:\n            self.set_template(template_array)\n\n    def set_template(self, array):\n        \"\"\"Set template array.\n\n        Args:\n            array (tuple | list | int | float | np.ndarray | torch.Tensor):\n                Template array.\n\n        Raises:\n            ValueError: If input is list or tuple and cannot be converted to\n                to a NumPy array, a ValueError is raised.\n            TypeError: If input type does not belong to the above range,\n                or the contents of a list or tuple do not share the\n                same data type, a TypeError is raised.\n        \"\"\"\n        self.array_type = type(array)\n        self.is_num = False\n        self.device = 'cpu'\n\n        if isinstance(array, np.ndarray):\n            self.dtype = array.dtype\n        elif isinstance(array, torch.Tensor):\n            self.dtype = array.dtype\n            self.device = array.device\n        elif isinstance(array, (list, tuple)):\n            try:\n                array = np.array(array)\n                if array.dtype not in self.SUPPORTED_NON_ARRAY_TYPES:\n                    raise TypeError\n                self.dtype = array.dtype\n            except (ValueError, TypeError):\n                print(f'The following list cannot be converted to'\n                      f' a numpy array of supported dtype:\\n{array}')\n                raise\n        elif isinstance(array, self.SUPPORTED_NON_ARRAY_TYPES):\n            self.array_type = np.ndarray\n            self.is_num = True\n            self.dtype = np.dtype(type(array))\n        else:\n            raise TypeError(f'Template type {self.array_type}'\n                            f' is not supported.')\n\n    def convert(self, input_array, target_type=None, target_array=None):\n        \"\"\"Convert input array to target data type.\n\n        Args:\n            input_array (tuple | list | np.ndarray |\n                torch.Tensor | int | float ):\n                Input array. Defaults to None.\n            target_type (<class 'np.ndarray'> | <class 'torch.Tensor'>,\n                optional):\n                Type to which input array is converted. Defaults to None.\n            target_array (np.ndarray | torch.Tensor, optional):\n                Template array to which input array is converted.\n                Defaults to None.\n\n        Raises:\n            ValueError: If input is list or tuple and cannot be converted to\n                to a NumPy array, a ValueError is raised.\n            TypeError: If input type does not belong to the above range,\n                or the contents of a list or tuple do not share the\n                same data type, a TypeError is raised.\n        \"\"\"\n        if isinstance(input_array, (list, tuple)):\n            try:\n                input_array = np.array(input_array)\n                if input_array.dtype not in self.SUPPORTED_NON_ARRAY_TYPES:\n                    raise TypeError\n            except (ValueError, TypeError):\n                print(f'The input cannot be converted to'\n                      f' a single-type numpy array:\\n{input_array}')\n                raise\n        elif isinstance(input_array, self.SUPPORTED_NON_ARRAY_TYPES):\n            input_array = np.array(input_array)\n        array_type = type(input_array)\n        assert target_type is not None or target_array is not None, \\\n            'must specify a target'\n        if target_type is not None:\n            assert target_type in (np.ndarray, torch.Tensor), \\\n                'invalid target type'\n            if target_type == array_type:\n                return input_array\n            elif target_type == np.ndarray:\n                # default dtype is float32\n                converted_array = input_array.cpu().numpy().astype(np.float32)\n            else:\n                # default dtype is float32, device is 'cpu'\n                converted_array = torch.tensor(\n                    input_array, dtype=torch.float32)\n        else:\n            assert isinstance(target_array, (np.ndarray, torch.Tensor)), \\\n                'invalid target array type'\n            if isinstance(target_array, array_type):\n                return input_array\n            elif isinstance(target_array, np.ndarray):\n                converted_array = input_array.cpu().numpy().astype(\n                    target_array.dtype)\n            else:\n                converted_array = target_array.new_tensor(input_array)\n        return converted_array\n\n    def recover(self, input_array):\n        assert isinstance(input_array, (np.ndarray, torch.Tensor)), \\\n            'invalid input array type'\n        if isinstance(input_array, self.array_type):\n            return input_array\n        elif isinstance(input_array, torch.Tensor):\n            converted_array = input_array.cpu().numpy().astype(self.dtype)\n        else:\n            converted_array = torch.tensor(\n                input_array, dtype=self.dtype, device=self.device)\n        if self.is_num:\n            converted_array = converted_array.item()\n        return converted_array\n"
  },
  {
    "path": "mmdet3d/core/utils/gaussian.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\n\n\ndef gaussian_2d(shape, sigma=1):\n    \"\"\"Generate gaussian map.\n\n    Args:\n        shape (list[int]): Shape of the map.\n        sigma (float, optional): Sigma to generate gaussian map.\n            Defaults to 1.\n\n    Returns:\n        np.ndarray: Generated gaussian map.\n    \"\"\"\n    m, n = [(ss - 1.) / 2. for ss in shape]\n    y, x = np.ogrid[-m:m + 1, -n:n + 1]\n\n    h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))\n    h[h < np.finfo(h.dtype).eps * h.max()] = 0\n    return h\n\n\ndef draw_heatmap_gaussian(heatmap, center, radius, k=1):\n    \"\"\"Get gaussian masked heatmap.\n\n    Args:\n        heatmap (torch.Tensor): Heatmap to be masked.\n        center (torch.Tensor): Center coord of the heatmap.\n        radius (int): Radius of gaussian.\n        K (int, optional): Multiple of masked_gaussian. Defaults to 1.\n\n    Returns:\n        torch.Tensor: Masked heatmap.\n    \"\"\"\n    diameter = 2 * radius + 1\n    gaussian = gaussian_2d((diameter, diameter), sigma=diameter / 6)\n\n    x, y = int(center[0]), int(center[1])\n\n    height, width = heatmap.shape[0:2]\n\n    left, right = min(x, radius), min(width - x, radius + 1)\n    top, bottom = min(y, radius), min(height - y, radius + 1)\n\n    masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]\n    masked_gaussian = torch.from_numpy(\n        gaussian[radius - top:radius + bottom,\n                 radius - left:radius + right]).to(heatmap.device,\n                                                   torch.float32)\n    if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:\n        torch.max(masked_heatmap, masked_gaussian * k, out=masked_heatmap)\n    return heatmap\n\n\ndef gaussian_radius(det_size, min_overlap=0.5):\n    \"\"\"Get radius of gaussian.\n\n    Args:\n        det_size (tuple[torch.Tensor]): Size of the detection result.\n        min_overlap (float, optional): Gaussian_overlap. Defaults to 0.5.\n\n    Returns:\n        torch.Tensor: Computed radius.\n    \"\"\"\n    height, width = det_size\n\n    a1 = 1\n    b1 = (height + width)\n    c1 = width * height * (1 - min_overlap) / (1 + min_overlap)\n    sq1 = torch.sqrt(b1**2 - 4 * a1 * c1)\n    r1 = (b1 + sq1) / 2\n\n    a2 = 4\n    b2 = 2 * (height + width)\n    c2 = (1 - min_overlap) * width * height\n    sq2 = torch.sqrt(b2**2 - 4 * a2 * c2)\n    r2 = (b2 + sq2) / 2\n\n    a3 = 4 * min_overlap\n    b3 = -2 * min_overlap * (height + width)\n    c3 = (min_overlap - 1) * width * height\n    sq3 = torch.sqrt(b3**2 - 4 * a3 * c3)\n    r3 = (b3 + sq3) / 2\n    return min(r1, r2, r3)\n\n\ndef get_ellip_gaussian_2D(heatmap, center, radius_x, radius_y, k=1):\n    \"\"\"Generate 2D ellipse gaussian heatmap.\n\n    Args:\n        heatmap (Tensor): Input heatmap, the gaussian kernel will cover on\n            it and maintain the max value.\n        center (list[int]): Coord of gaussian kernel's center.\n        radius_x (int): X-axis radius of gaussian kernel.\n        radius_y (int): Y-axis radius of gaussian kernel.\n        k (int, optional): Coefficient of gaussian kernel. Default: 1.\n\n    Returns:\n        out_heatmap (Tensor): Updated heatmap covered by gaussian kernel.\n    \"\"\"\n    diameter_x, diameter_y = 2 * radius_x + 1, 2 * radius_y + 1\n    gaussian_kernel = ellip_gaussian2D((radius_x, radius_y),\n                                       sigma_x=diameter_x / 6,\n                                       sigma_y=diameter_y / 6,\n                                       dtype=heatmap.dtype,\n                                       device=heatmap.device)\n\n    x, y = int(center[0]), int(center[1])\n    height, width = heatmap.shape[0:2]\n\n    left, right = min(x, radius_x), min(width - x, radius_x + 1)\n    top, bottom = min(y, radius_y), min(height - y, radius_y + 1)\n\n    masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]\n    masked_gaussian = gaussian_kernel[radius_y - top:radius_y + bottom,\n                                      radius_x - left:radius_x + right]\n    out_heatmap = heatmap\n    torch.max(\n        masked_heatmap,\n        masked_gaussian * k,\n        out=out_heatmap[y - top:y + bottom, x - left:x + right])\n\n    return out_heatmap\n\n\ndef ellip_gaussian2D(radius,\n                     sigma_x,\n                     sigma_y,\n                     dtype=torch.float32,\n                     device='cpu'):\n    \"\"\"Generate 2D ellipse gaussian kernel.\n\n    Args:\n        radius (tuple(int)): Ellipse radius (radius_x, radius_y) of gaussian\n            kernel.\n        sigma_x (int): X-axis sigma of gaussian function.\n        sigma_y (int): Y-axis sigma of gaussian function.\n        dtype (torch.dtype, optional): Dtype of gaussian tensor.\n            Default: torch.float32.\n        device (str, optional): Device of gaussian tensor.\n            Default: 'cpu'.\n\n    Returns:\n        h (Tensor): Gaussian kernel with a\n            ``(2 * radius_y + 1) * (2 * radius_x + 1)`` shape.\n    \"\"\"\n    x = torch.arange(\n        -radius[0], radius[0] + 1, dtype=dtype, device=device).view(1, -1)\n    y = torch.arange(\n        -radius[1], radius[1] + 1, dtype=dtype, device=device).view(-1, 1)\n\n    h = (-(x * x) / (2 * sigma_x * sigma_x) - (y * y) /\n         (2 * sigma_y * sigma_y)).exp()\n    h[h < torch.finfo(h.dtype).eps * h.max()] = 0\n\n    return h\n"
  },
  {
    "path": "mmdet3d/core/visualizer/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .show_result import (show_multi_modality_result, show_result,\n                          show_seg_result)\n\n__all__ = ['show_result', 'show_seg_result', 'show_multi_modality_result']\n"
  },
  {
    "path": "mmdet3d/core/visualizer/image_vis.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\n\nimport cv2\nimport numpy as np\nimport torch\nfrom matplotlib import pyplot as plt\n\n\ndef project_pts_on_img(points,\n                       raw_img,\n                       lidar2img_rt,\n                       max_distance=70,\n                       thickness=-1):\n    \"\"\"Project the 3D points cloud on 2D image.\n\n    Args:\n        points (numpy.array): 3D points cloud (x, y, z) to visualize.\n        raw_img (numpy.array): The numpy array of image.\n        lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix\n            according to the camera intrinsic parameters.\n        max_distance (float, optional): the max distance of the points cloud.\n            Default: 70.\n        thickness (int, optional): The thickness of 2D points. Default: -1.\n    \"\"\"\n    img = raw_img.copy()\n    num_points = points.shape[0]\n    pts_4d = np.concatenate([points[:, :3], np.ones((num_points, 1))], axis=-1)\n    pts_2d = pts_4d @ lidar2img_rt.T\n\n    # cam_points is Tensor of Nx4 whose last column is 1\n    # transform camera coordinate to image coordinate\n    pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=99999)\n    pts_2d[:, 0] /= pts_2d[:, 2]\n    pts_2d[:, 1] /= pts_2d[:, 2]\n\n    fov_inds = ((pts_2d[:, 0] < img.shape[1])\n                & (pts_2d[:, 0] >= 0)\n                & (pts_2d[:, 1] < img.shape[0])\n                & (pts_2d[:, 1] >= 0))\n\n    imgfov_pts_2d = pts_2d[fov_inds, :3]  # u, v, d\n\n    cmap = plt.cm.get_cmap('hsv', 256)\n    cmap = np.array([cmap(i) for i in range(256)])[:, :3] * 255\n    for i in range(imgfov_pts_2d.shape[0]):\n        depth = imgfov_pts_2d[i, 2]\n        color = cmap[np.clip(int(max_distance * 10 / depth), 0, 255), :]\n        cv2.circle(\n            img,\n            center=(int(np.round(imgfov_pts_2d[i, 0])),\n                    int(np.round(imgfov_pts_2d[i, 1]))),\n            radius=1,\n            color=tuple(color),\n            thickness=thickness,\n        )\n    cv2.imshow('project_pts_img', img.astype(np.uint8))\n    cv2.waitKey(100)\n\n\ndef plot_rect3d_on_img(img,\n                       num_rects,\n                       rect_corners,\n                       color=(0, 255, 0),\n                       thickness=1):\n    \"\"\"Plot the boundary lines of 3D rectangular on 2D images.\n\n    Args:\n        img (numpy.array): The numpy array of image.\n        num_rects (int): Number of 3D rectangulars.\n        rect_corners (numpy.array): Coordinates of the corners of 3D\n            rectangulars. Should be in the shape of [num_rect, 8, 2].\n        color (tuple[int], optional): The color to draw bboxes.\n            Default: (0, 255, 0).\n        thickness (int, optional): The thickness of bboxes. Default: 1.\n    \"\"\"\n    line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7),\n                    (4, 5), (4, 7), (2, 6), (5, 6), (6, 7))\n    for i in range(num_rects):\n        corners = rect_corners[i].astype(np.int)\n        for start, end in line_indices:\n            cv2.line(img, (corners[start, 0], corners[start, 1]),\n                     (corners[end, 0], corners[end, 1]), color, thickness,\n                     cv2.LINE_AA)\n\n    return img.astype(np.uint8)\n\n\ndef draw_lidar_bbox3d_on_img(bboxes3d,\n                             raw_img,\n                             lidar2img_rt,\n                             img_metas,\n                             color=(0, 255, 0),\n                             thickness=1):\n    \"\"\"Project the 3D bbox on 2D plane and draw on input image.\n\n    Args:\n        bboxes3d (:obj:`LiDARInstance3DBoxes`):\n            3d bbox in lidar coordinate system to visualize.\n        raw_img (numpy.array): The numpy array of image.\n        lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix\n            according to the camera intrinsic parameters.\n        img_metas (dict): Useless here.\n        color (tuple[int], optional): The color to draw bboxes.\n            Default: (0, 255, 0).\n        thickness (int, optional): The thickness of bboxes. Default: 1.\n    \"\"\"\n    img = raw_img.copy()\n    corners_3d = bboxes3d.corners\n    num_bbox = corners_3d.shape[0]\n    pts_4d = np.concatenate(\n        [corners_3d.reshape(-1, 3),\n         np.ones((num_bbox * 8, 1))], axis=-1)\n    lidar2img_rt = copy.deepcopy(lidar2img_rt).reshape(4, 4)\n    if isinstance(lidar2img_rt, torch.Tensor):\n        lidar2img_rt = lidar2img_rt.cpu().numpy()\n    pts_2d = pts_4d @ lidar2img_rt.T\n\n    pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=1e5)\n    pts_2d[:, 0] /= pts_2d[:, 2]\n    pts_2d[:, 1] /= pts_2d[:, 2]\n    imgfov_pts_2d = pts_2d[..., :2].reshape(num_bbox, 8, 2)\n\n    return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)\n\n\n# TODO: remove third parameter in all functions here in favour of img_metas\ndef draw_depth_bbox3d_on_img(bboxes3d,\n                             raw_img,\n                             calibs,\n                             img_metas,\n                             color=(0, 255, 0),\n                             thickness=1):\n    \"\"\"Project the 3D bbox on 2D plane and draw on input image.\n\n    Args:\n        bboxes3d (:obj:`DepthInstance3DBoxes`, shape=[M, 7]):\n            3d bbox in depth coordinate system to visualize.\n        raw_img (numpy.array): The numpy array of image.\n        calibs (dict): Camera calibration information, Rt and K.\n        img_metas (dict): Used in coordinates transformation.\n        color (tuple[int], optional): The color to draw bboxes.\n            Default: (0, 255, 0).\n        thickness (int, optional): The thickness of bboxes. Default: 1.\n    \"\"\"\n    from mmdet3d.core.bbox import points_cam2img\n    from mmdet3d.models import apply_3d_transformation\n\n    img = raw_img.copy()\n    img_metas = copy.deepcopy(img_metas)\n    corners_3d = bboxes3d.corners\n    num_bbox = corners_3d.shape[0]\n    points_3d = corners_3d.reshape(-1, 3)\n\n    # first reverse the data transformations\n    xyz_depth = apply_3d_transformation(\n        points_3d, 'DEPTH', img_metas, reverse=True)\n\n    # project to 2d to get image coords (uv)\n    uv_origin = points_cam2img(xyz_depth,\n                               xyz_depth.new_tensor(img_metas['depth2img']))\n    uv_origin = (uv_origin - 1).round()\n    imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()\n\n    return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)\n\n\ndef draw_camera_bbox3d_on_img(bboxes3d,\n                              raw_img,\n                              cam2img,\n                              img_metas,\n                              color=(0, 255, 0),\n                              thickness=1):\n    \"\"\"Project the 3D bbox on 2D plane and draw on input image.\n\n    Args:\n        bboxes3d (:obj:`CameraInstance3DBoxes`, shape=[M, 7]):\n            3d bbox in camera coordinate system to visualize.\n        raw_img (numpy.array): The numpy array of image.\n        cam2img (dict): Camera intrinsic matrix,\n            denoted as `K` in depth bbox coordinate system.\n        img_metas (dict): Useless here.\n        color (tuple[int], optional): The color to draw bboxes.\n            Default: (0, 255, 0).\n        thickness (int, optional): The thickness of bboxes. Default: 1.\n    \"\"\"\n    from mmdet3d.core.bbox import points_cam2img\n\n    img = raw_img.copy()\n    cam2img = copy.deepcopy(cam2img)\n    corners_3d = bboxes3d.corners\n    num_bbox = corners_3d.shape[0]\n    points_3d = corners_3d.reshape(-1, 3)\n    if not isinstance(cam2img, torch.Tensor):\n        cam2img = torch.from_numpy(np.array(cam2img))\n\n    assert (cam2img.shape == torch.Size([3, 3])\n            or cam2img.shape == torch.Size([4, 4]))\n    cam2img = cam2img.float().cpu()\n\n    # project to 2d to get image coords (uv)\n    uv_origin = points_cam2img(points_3d, cam2img)\n    uv_origin = (uv_origin - 1).round()\n    imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()\n\n    return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)\n"
  },
  {
    "path": "mmdet3d/core/visualizer/open3d_vis.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\n\nimport numpy as np\nimport torch\n\ntry:\n    import open3d as o3d\n    from open3d import geometry\nexcept ImportError:\n    raise ImportError(\n        'Please run \"pip install open3d\" to install open3d first.')\n\n\ndef _draw_points(points,\n                 vis,\n                 points_size=2,\n                 point_color=(0.5, 0.5, 0.5),\n                 mode='xyz'):\n    \"\"\"Draw points on visualizer.\n\n    Args:\n        points (numpy.array | torch.tensor, shape=[N, 3+C]):\n            points to visualize.\n        vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.\n        points_size (int, optional): the size of points to show on visualizer.\n            Default: 2.\n        point_color (tuple[float], optional): the color of points.\n            Default: (0.5, 0.5, 0.5).\n        mode (str, optional):  indicate type of the input points,\n            available mode ['xyz', 'xyzrgb']. Default: 'xyz'.\n\n    Returns:\n        tuple: points, color of each point.\n    \"\"\"\n    vis.get_render_option().point_size = points_size  # set points size\n    if isinstance(points, torch.Tensor):\n        points = points.cpu().numpy()\n\n    points = points.copy()\n    pcd = geometry.PointCloud()\n    if mode == 'xyz':\n        pcd.points = o3d.utility.Vector3dVector(points[:, :3])\n        points_colors = np.tile(np.array(point_color), (points.shape[0], 1))\n    elif mode == 'xyzrgb':\n        pcd.points = o3d.utility.Vector3dVector(points[:, :3])\n        points_colors = points[:, 3:6]\n        # normalize to [0, 1] for open3d drawing\n        if not ((points_colors >= 0.0) & (points_colors <= 1.0)).all():\n            points_colors /= 255.0\n    else:\n        raise NotImplementedError\n\n    pcd.colors = o3d.utility.Vector3dVector(points_colors)\n    vis.add_geometry(pcd)\n\n    return pcd, points_colors\n\n\ndef _draw_bboxes(bbox3d,\n                 vis,\n                 points_colors,\n                 pcd=None,\n                 bbox_color=(0, 1, 0),\n                 points_in_box_color=(1, 0, 0),\n                 rot_axis=2,\n                 center_mode='lidar_bottom',\n                 mode='xyz'):\n    \"\"\"Draw bbox on visualizer and change the color of points inside bbox3d.\n\n    Args:\n        bbox3d (numpy.array | torch.tensor, shape=[M, 7]):\n            3d bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.\n        vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.\n        points_colors (numpy.array): color of each points.\n        pcd (:obj:`open3d.geometry.PointCloud`, optional): point cloud.\n            Default: None.\n        bbox_color (tuple[float], optional): the color of bbox.\n            Default: (0, 1, 0).\n        points_in_box_color (tuple[float], optional):\n            the color of points inside bbox3d. Default: (1, 0, 0).\n        rot_axis (int, optional): rotation axis of bbox. Default: 2.\n        center_mode (bool, optional): indicate the center of bbox is\n            bottom center or gravity center. available mode\n            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.\n        mode (str, optional):  indicate type of the input points,\n            available mode ['xyz', 'xyzrgb']. Default: 'xyz'.\n    \"\"\"\n    if isinstance(bbox3d, torch.Tensor):\n        bbox3d = bbox3d.cpu().numpy()\n    bbox3d = bbox3d.copy()\n\n    in_box_color = np.array(points_in_box_color)\n    for i in range(len(bbox3d)):\n        center = bbox3d[i, 0:3]\n        dim = bbox3d[i, 3:6]\n        yaw = np.zeros(3)\n        yaw[rot_axis] = bbox3d[i, 6]\n        rot_mat = geometry.get_rotation_matrix_from_xyz(yaw)\n\n        if center_mode == 'lidar_bottom':\n            center[rot_axis] += dim[\n                rot_axis] / 2  # bottom center to gravity center\n        elif center_mode == 'camera_bottom':\n            center[rot_axis] -= dim[\n                rot_axis] / 2  # bottom center to gravity center\n        box3d = geometry.OrientedBoundingBox(center, rot_mat, dim)\n\n        line_set = geometry.LineSet.create_from_oriented_bounding_box(box3d)\n        line_set.paint_uniform_color(bbox_color)\n        # draw bboxes on visualizer\n        vis.add_geometry(line_set)\n\n        # change the color of points which are in box\n        if pcd is not None and mode == 'xyz':\n            indices = box3d.get_point_indices_within_bounding_box(pcd.points)\n            points_colors[indices] = in_box_color\n\n    # update points colors\n    if pcd is not None:\n        pcd.colors = o3d.utility.Vector3dVector(points_colors)\n        vis.update_geometry(pcd)\n\n\ndef show_pts_boxes(points,\n                   bbox3d=None,\n                   show=True,\n                   save_path=None,\n                   points_size=2,\n                   point_color=(0.5, 0.5, 0.5),\n                   bbox_color=(0, 1, 0),\n                   points_in_box_color=(1, 0, 0),\n                   rot_axis=2,\n                   center_mode='lidar_bottom',\n                   mode='xyz'):\n    \"\"\"Draw bbox and points on visualizer.\n\n    Args:\n        points (numpy.array | torch.tensor, shape=[N, 3+C]):\n            points to visualize.\n        bbox3d (numpy.array | torch.tensor, shape=[M, 7], optional):\n            3D bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.\n            Defaults to None.\n        show (bool, optional): whether to show the visualization results.\n            Default: True.\n        save_path (str, optional): path to save visualized results.\n            Default: None.\n        points_size (int, optional): the size of points to show on visualizer.\n            Default: 2.\n        point_color (tuple[float], optional): the color of points.\n            Default: (0.5, 0.5, 0.5).\n        bbox_color (tuple[float], optional): the color of bbox.\n            Default: (0, 1, 0).\n        points_in_box_color (tuple[float], optional):\n            the color of points which are in bbox3d. Default: (1, 0, 0).\n        rot_axis (int, optional): rotation axis of bbox. Default: 2.\n        center_mode (bool, optional): indicate the center of bbox is bottom\n            center or gravity center. available mode\n            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.\n        mode (str, optional):  indicate type of the input points, available\n            mode ['xyz', 'xyzrgb']. Default: 'xyz'.\n    \"\"\"\n    # TODO: support score and class info\n    assert 0 <= rot_axis <= 2\n\n    # init visualizer\n    vis = o3d.visualization.Visualizer()\n    vis.create_window()\n    mesh_frame = geometry.TriangleMesh.create_coordinate_frame(\n        size=1, origin=[0, 0, 0])  # create coordinate frame\n    vis.add_geometry(mesh_frame)\n\n    # draw points\n    pcd, points_colors = _draw_points(points, vis, points_size, point_color,\n                                      mode)\n\n    # draw boxes\n    if bbox3d is not None:\n        _draw_bboxes(bbox3d, vis, points_colors, pcd, bbox_color,\n                     points_in_box_color, rot_axis, center_mode, mode)\n\n    if show:\n        vis.run()\n\n    if save_path is not None:\n        vis.capture_screen_image(save_path)\n\n    vis.destroy_window()\n\n\ndef _draw_bboxes_ind(bbox3d,\n                     vis,\n                     indices,\n                     points_colors,\n                     pcd=None,\n                     bbox_color=(0, 1, 0),\n                     points_in_box_color=(1, 0, 0),\n                     rot_axis=2,\n                     center_mode='lidar_bottom',\n                     mode='xyz'):\n    \"\"\"Draw bbox on visualizer and change the color or points inside bbox3d\n    with indices.\n\n    Args:\n        bbox3d (numpy.array | torch.tensor, shape=[M, 7]):\n            3d bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.\n        vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.\n        indices (numpy.array | torch.tensor, shape=[N, M]):\n            indicate which bbox3d that each point lies in.\n        points_colors (numpy.array): color of each points.\n        pcd (:obj:`open3d.geometry.PointCloud`, optional): point cloud.\n            Default: None.\n        bbox_color (tuple[float], optional): the color of bbox.\n            Default: (0, 1, 0).\n        points_in_box_color (tuple[float], optional):\n            the color of points which are in bbox3d. Default: (1, 0, 0).\n        rot_axis (int, optional): rotation axis of bbox. Default: 2.\n        center_mode (bool, optional): indicate the center of bbox is\n            bottom center or gravity center. available mode\n            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.\n        mode (str, optional):  indicate type of the input points,\n            available mode ['xyz', 'xyzrgb']. Default: 'xyz'.\n    \"\"\"\n    if isinstance(bbox3d, torch.Tensor):\n        bbox3d = bbox3d.cpu().numpy()\n    if isinstance(indices, torch.Tensor):\n        indices = indices.cpu().numpy()\n    bbox3d = bbox3d.copy()\n\n    in_box_color = np.array(points_in_box_color)\n    for i in range(len(bbox3d)):\n        center = bbox3d[i, 0:3]\n        dim = bbox3d[i, 3:6]\n        yaw = np.zeros(3)\n        # TODO: fix problem of current coordinate system\n        # dim[0], dim[1] = dim[1], dim[0]  # for current coordinate\n        # yaw[rot_axis] = -(bbox3d[i, 6] - 0.5 * np.pi)\n        yaw[rot_axis] = -bbox3d[i, 6]\n        rot_mat = geometry.get_rotation_matrix_from_xyz(yaw)\n        if center_mode == 'lidar_bottom':\n            center[rot_axis] += dim[\n                rot_axis] / 2  # bottom center to gravity center\n        elif center_mode == 'camera_bottom':\n            center[rot_axis] -= dim[\n                rot_axis] / 2  # bottom center to gravity center\n        box3d = geometry.OrientedBoundingBox(center, rot_mat, dim)\n\n        line_set = geometry.LineSet.create_from_oriented_bounding_box(box3d)\n        line_set.paint_uniform_color(bbox_color)\n        # draw bboxes on visualizer\n        vis.add_geometry(line_set)\n\n        # change the color of points which are in box\n        if pcd is not None and mode == 'xyz':\n            points_colors[indices[:, i].astype(np.bool)] = in_box_color\n\n    # update points colors\n    if pcd is not None:\n        pcd.colors = o3d.utility.Vector3dVector(points_colors)\n        vis.update_geometry(pcd)\n\n\ndef show_pts_index_boxes(points,\n                         bbox3d=None,\n                         show=True,\n                         indices=None,\n                         save_path=None,\n                         points_size=2,\n                         point_color=(0.5, 0.5, 0.5),\n                         bbox_color=(0, 1, 0),\n                         points_in_box_color=(1, 0, 0),\n                         rot_axis=2,\n                         center_mode='lidar_bottom',\n                         mode='xyz'):\n    \"\"\"Draw bbox and points on visualizer with indices that indicate which\n    bbox3d that each point lies in.\n\n    Args:\n        points (numpy.array | torch.tensor, shape=[N, 3+C]):\n            points to visualize.\n        bbox3d (numpy.array | torch.tensor, shape=[M, 7]):\n            3D bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.\n            Defaults to None.\n        show (bool, optional): whether to show the visualization results.\n            Default: True.\n        indices (numpy.array | torch.tensor, shape=[N, M], optional):\n            indicate which bbox3d that each point lies in. Default: None.\n        save_path (str, optional): path to save visualized results.\n            Default: None.\n        points_size (int, optional): the size of points to show on visualizer.\n            Default: 2.\n        point_color (tuple[float], optional): the color of points.\n            Default: (0.5, 0.5, 0.5).\n        bbox_color (tuple[float], optional): the color of bbox.\n            Default: (0, 1, 0).\n        points_in_box_color (tuple[float], optional):\n            the color of points which are in bbox3d. Default: (1, 0, 0).\n        rot_axis (int, optional): rotation axis of bbox. Default: 2.\n        center_mode (bool, optional): indicate the center of bbox is\n            bottom center or gravity center. available mode\n            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.\n        mode (str, optional):  indicate type of the input points,\n            available mode ['xyz', 'xyzrgb']. Default: 'xyz'.\n    \"\"\"\n    # TODO: support score and class info\n    assert 0 <= rot_axis <= 2\n\n    # init visualizer\n    vis = o3d.visualization.Visualizer()\n    vis.create_window()\n    mesh_frame = geometry.TriangleMesh.create_coordinate_frame(\n        size=1, origin=[0, 0, 0])  # create coordinate frame\n    vis.add_geometry(mesh_frame)\n\n    # draw points\n    pcd, points_colors = _draw_points(points, vis, points_size, point_color,\n                                      mode)\n\n    # draw boxes\n    if bbox3d is not None:\n        _draw_bboxes_ind(bbox3d, vis, indices, points_colors, pcd, bbox_color,\n                         points_in_box_color, rot_axis, center_mode, mode)\n\n    if show:\n        vis.run()\n\n    if save_path is not None:\n        vis.capture_screen_image(save_path)\n\n    vis.destroy_window()\n\n\nclass Visualizer(object):\n    r\"\"\"Online visualizer implemented with Open3d.\n\n    Args:\n        points (numpy.array, shape=[N, 3+C]): Points to visualize. The Points\n            cloud is in mode of Coord3DMode.DEPTH (please refer to\n            core.structures.coord_3d_mode).\n        bbox3d (numpy.array, shape=[M, 7], optional): 3D bbox\n            (x, y, z, x_size, y_size, z_size, yaw) to visualize.\n            The 3D bbox is in mode of Box3DMode.DEPTH with\n            gravity_center (please refer to core.structures.box_3d_mode).\n            Default: None.\n        save_path (str, optional): path to save visualized results.\n            Default: None.\n        points_size (int, optional): the size of points to show on visualizer.\n            Default: 2.\n        point_color (tuple[float], optional): the color of points.\n            Default: (0.5, 0.5, 0.5).\n        bbox_color (tuple[float], optional): the color of bbox.\n            Default: (0, 1, 0).\n        points_in_box_color (tuple[float], optional):\n            the color of points which are in bbox3d. Default: (1, 0, 0).\n        rot_axis (int, optional): rotation axis of bbox. Default: 2.\n        center_mode (bool, optional): indicate the center of bbox is\n            bottom center or gravity center. available mode\n            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.\n        mode (str, optional):  indicate type of the input points,\n            available mode ['xyz', 'xyzrgb']. Default: 'xyz'.\n    \"\"\"\n\n    def __init__(self,\n                 points,\n                 bbox3d=None,\n                 save_path=None,\n                 points_size=2,\n                 point_color=(0.5, 0.5, 0.5),\n                 bbox_color=(0, 1, 0),\n                 points_in_box_color=(1, 0, 0),\n                 rot_axis=2,\n                 center_mode='lidar_bottom',\n                 mode='xyz'):\n        super(Visualizer, self).__init__()\n        assert 0 <= rot_axis <= 2\n\n        # init visualizer\n        self.o3d_visualizer = o3d.visualization.Visualizer()\n        self.o3d_visualizer.create_window()\n        mesh_frame = geometry.TriangleMesh.create_coordinate_frame(\n            size=1, origin=[0, 0, 0])  # create coordinate frame\n        self.o3d_visualizer.add_geometry(mesh_frame)\n\n        self.points_size = points_size\n        self.point_color = point_color\n        self.bbox_color = bbox_color\n        self.points_in_box_color = points_in_box_color\n        self.rot_axis = rot_axis\n        self.center_mode = center_mode\n        self.mode = mode\n        self.seg_num = 0\n\n        # draw points\n        if points is not None:\n            self.pcd, self.points_colors = _draw_points(\n                points, self.o3d_visualizer, points_size, point_color, mode)\n\n        # draw boxes\n        if bbox3d is not None:\n            _draw_bboxes(bbox3d, self.o3d_visualizer, self.points_colors,\n                         self.pcd, bbox_color, points_in_box_color, rot_axis,\n                         center_mode, mode)\n\n    def add_bboxes(self, bbox3d, bbox_color=None, points_in_box_color=None):\n        \"\"\"Add bounding box to visualizer.\n\n        Args:\n            bbox3d (numpy.array, shape=[M, 7]):\n                3D bbox (x, y, z, x_size, y_size, z_size, yaw)\n                to be visualized. The 3d bbox is in mode of\n                Box3DMode.DEPTH with gravity_center (please refer to\n                core.structures.box_3d_mode).\n            bbox_color (tuple[float]): the color of bbox. Default: None.\n            points_in_box_color (tuple[float]): the color of points which\n                are in bbox3d. Default: None.\n        \"\"\"\n        if bbox_color is None:\n            bbox_color = self.bbox_color\n        if points_in_box_color is None:\n            points_in_box_color = self.points_in_box_color\n        _draw_bboxes(bbox3d, self.o3d_visualizer, self.points_colors, self.pcd,\n                     bbox_color, points_in_box_color, self.rot_axis,\n                     self.center_mode, self.mode)\n\n    def add_seg_mask(self, seg_mask_colors):\n        \"\"\"Add segmentation mask to visualizer via per-point colorization.\n\n        Args:\n            seg_mask_colors (numpy.array, shape=[N, 6]):\n                The segmentation mask whose first 3 dims are point coordinates\n                and last 3 dims are converted colors.\n        \"\"\"\n        # we can't draw the colors on existing points\n        # in case gt and pred mask would overlap\n        # instead we set a large offset along x-axis for each seg mask\n        self.seg_num += 1\n        offset = (np.array(self.pcd.points).max(0) -\n                  np.array(self.pcd.points).min(0))[0] * 1.2 * self.seg_num\n        mesh_frame = geometry.TriangleMesh.create_coordinate_frame(\n            size=1, origin=[offset, 0, 0])  # create coordinate frame for seg\n        self.o3d_visualizer.add_geometry(mesh_frame)\n        seg_points = copy.deepcopy(seg_mask_colors)\n        seg_points[:, 0] += offset\n        _draw_points(\n            seg_points, self.o3d_visualizer, self.points_size, mode='xyzrgb')\n\n    def show(self, save_path=None):\n        \"\"\"Visualize the points cloud.\n\n        Args:\n            save_path (str, optional): path to save image. Default: None.\n        \"\"\"\n\n        self.o3d_visualizer.run()\n\n        if save_path is not None:\n            self.o3d_visualizer.capture_screen_image(save_path)\n\n        self.o3d_visualizer.destroy_window()\n        return\n"
  },
  {
    "path": "mmdet3d/core/visualizer/show_result.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom os import path as osp\n\nimport mmcv\nimport numpy as np\nimport trimesh\n\nfrom .image_vis import (draw_camera_bbox3d_on_img, draw_depth_bbox3d_on_img,\n                        draw_lidar_bbox3d_on_img)\n\n\ndef _write_obj(points, out_filename):\n    \"\"\"Write points into ``obj`` format for meshlab visualization.\n\n    Args:\n        points (np.ndarray): Points in shape (N, dim).\n        out_filename (str): Filename to be saved.\n    \"\"\"\n    N = points.shape[0]\n    fout = open(out_filename, 'w')\n    for i in range(N):\n        if points.shape[1] == 6:\n            c = points[i, 3:].astype(int)\n            fout.write(\n                'v %f %f %f %d %d %d\\n' %\n                (points[i, 0], points[i, 1], points[i, 2], c[0], c[1], c[2]))\n\n        else:\n            fout.write('v %f %f %f\\n' %\n                       (points[i, 0], points[i, 1], points[i, 2]))\n    fout.close()\n\n\ndef _write_oriented_bbox(scene_bbox, out_filename):\n    \"\"\"Export oriented (around Z axis) scene bbox to meshes.\n\n    Args:\n        scene_bbox(list[ndarray] or ndarray): xyz pos of center and\n            3 lengths (x_size, y_size, z_size) and heading angle around Z axis.\n            Y forward, X right, Z upward. heading angle of positive X is 0,\n            heading angle of positive Y is 90 degrees.\n        out_filename(str): Filename.\n    \"\"\"\n\n    def heading2rotmat(heading_angle):\n        rotmat = np.zeros((3, 3))\n        rotmat[2, 2] = 1\n        cosval = np.cos(heading_angle)\n        sinval = np.sin(heading_angle)\n        rotmat[0:2, 0:2] = np.array([[cosval, -sinval], [sinval, cosval]])\n        return rotmat\n\n    def convert_oriented_box_to_trimesh_fmt(box):\n        ctr = box[:3]\n        lengths = box[3:6]\n        trns = np.eye(4)\n        trns[0:3, 3] = ctr\n        trns[3, 3] = 1.0\n        trns[0:3, 0:3] = heading2rotmat(box[6])\n        box_trimesh_fmt = trimesh.creation.box(lengths, trns)\n        return box_trimesh_fmt\n\n    if len(scene_bbox) == 0:\n        scene_bbox = np.zeros((1, 7))\n    scene = trimesh.scene.Scene()\n    for box in scene_bbox:\n        scene.add_geometry(convert_oriented_box_to_trimesh_fmt(box))\n\n    mesh_list = trimesh.util.concatenate(scene.dump())\n    # save to obj file\n    trimesh.io.export.export_mesh(mesh_list, out_filename, file_type='obj')\n\n    return\n\n\ndef show_result(points,\n                gt_bboxes,\n                pred_bboxes,\n                out_dir,\n                filename,\n                show=False,\n                snapshot=False,\n                pred_labels=None):\n    \"\"\"Convert results into format that is directly readable for meshlab.\n\n    Args:\n        points (np.ndarray): Points.\n        gt_bboxes (np.ndarray): Ground truth boxes.\n        pred_bboxes (np.ndarray): Predicted boxes.\n        out_dir (str): Path of output directory\n        filename (str): Filename of the current frame.\n        show (bool, optional): Visualize the results online. Defaults to False.\n        snapshot (bool, optional): Whether to save the online results.\n            Defaults to False.\n        pred_labels (np.ndarray, optional): Predicted labels of boxes.\n            Defaults to None.\n    \"\"\"\n    result_path = osp.join(out_dir, filename)\n    mmcv.mkdir_or_exist(result_path)\n\n    if show:\n        from .open3d_vis import Visualizer\n\n        vis = Visualizer(points)\n        if pred_bboxes is not None:\n            if pred_labels is None:\n                vis.add_bboxes(bbox3d=pred_bboxes)\n            else:\n                palette = np.random.randint(\n                    0, 255, size=(pred_labels.max() + 1, 3)) / 256\n                labelDict = {}\n                for j in range(len(pred_labels)):\n                    i = int(pred_labels[j].numpy())\n                    if labelDict.get(i) is None:\n                        labelDict[i] = []\n                    labelDict[i].append(pred_bboxes[j])\n                for i in labelDict:\n                    vis.add_bboxes(\n                        bbox3d=np.array(labelDict[i]),\n                        bbox_color=palette[i],\n                        points_in_box_color=palette[i])\n\n        if gt_bboxes is not None:\n            vis.add_bboxes(bbox3d=gt_bboxes, bbox_color=(0, 0, 1))\n        show_path = osp.join(result_path,\n                             f'{filename}_online.png') if snapshot else None\n        vis.show(show_path)\n\n    if points is not None:\n        _write_obj(points, osp.join(result_path, f'{filename}_points.obj'))\n\n    if gt_bboxes is not None:\n        # bottom center to gravity center\n        gt_bboxes[..., 2] += gt_bboxes[..., 5] / 2\n\n        _write_oriented_bbox(gt_bboxes,\n                             osp.join(result_path, f'{filename}_gt.obj'))\n\n    if pred_bboxes is not None:\n        # bottom center to gravity center\n        pred_bboxes[..., 2] += pred_bboxes[..., 5] / 2\n\n        _write_oriented_bbox(pred_bboxes,\n                             osp.join(result_path, f'{filename}_pred.obj'))\n\n\ndef show_seg_result(points,\n                    gt_seg,\n                    pred_seg,\n                    out_dir,\n                    filename,\n                    palette,\n                    ignore_index=None,\n                    show=False,\n                    snapshot=False):\n    \"\"\"Convert results into format that is directly readable for meshlab.\n\n    Args:\n        points (np.ndarray): Points.\n        gt_seg (np.ndarray): Ground truth segmentation mask.\n        pred_seg (np.ndarray): Predicted segmentation mask.\n        out_dir (str): Path of output directory\n        filename (str): Filename of the current frame.\n        palette (np.ndarray): Mapping between class labels and colors.\n        ignore_index (int, optional): The label index to be ignored, e.g.\n            unannotated points. Defaults to None.\n        show (bool, optional): Visualize the results online. Defaults to False.\n        snapshot (bool, optional): Whether to save the online results.\n            Defaults to False.\n    \"\"\"\n    # we need 3D coordinates to visualize segmentation mask\n    if gt_seg is not None or pred_seg is not None:\n        assert points is not None, \\\n            '3D coordinates are required for segmentation visualization'\n\n    # filter out ignored points\n    if gt_seg is not None and ignore_index is not None:\n        if points is not None:\n            points = points[gt_seg != ignore_index]\n        if pred_seg is not None:\n            pred_seg = pred_seg[gt_seg != ignore_index]\n        gt_seg = gt_seg[gt_seg != ignore_index]\n\n    if gt_seg is not None:\n        gt_seg_color = palette[gt_seg]\n        gt_seg_color = np.concatenate([points[:, :3], gt_seg_color], axis=1)\n    if pred_seg is not None:\n        pred_seg_color = palette[pred_seg]\n        pred_seg_color = np.concatenate([points[:, :3], pred_seg_color],\n                                        axis=1)\n\n    result_path = osp.join(out_dir, filename)\n    mmcv.mkdir_or_exist(result_path)\n\n    # online visualization of segmentation mask\n    # we show three masks in a row, scene_points, gt_mask, pred_mask\n    if show:\n        from .open3d_vis import Visualizer\n        mode = 'xyzrgb' if points.shape[1] == 6 else 'xyz'\n        vis = Visualizer(points, mode=mode)\n        if gt_seg is not None:\n            vis.add_seg_mask(gt_seg_color)\n        if pred_seg is not None:\n            vis.add_seg_mask(pred_seg_color)\n        show_path = osp.join(result_path,\n                             f'{filename}_online.png') if snapshot else None\n        vis.show(show_path)\n\n    if points is not None:\n        _write_obj(points, osp.join(result_path, f'{filename}_points.obj'))\n\n    if gt_seg is not None:\n        _write_obj(gt_seg_color, osp.join(result_path, f'{filename}_gt.obj'))\n\n    if pred_seg is not None:\n        _write_obj(pred_seg_color, osp.join(result_path,\n                                            f'{filename}_pred.obj'))\n\n\ndef show_multi_modality_result(img,\n                               gt_bboxes,\n                               pred_bboxes,\n                               proj_mat,\n                               out_dir,\n                               filename,\n                               box_mode='lidar',\n                               img_metas=None,\n                               show=False,\n                               gt_bbox_color=(61, 102, 255),\n                               pred_bbox_color=(241, 101, 72)):\n    \"\"\"Convert multi-modality detection results into 2D results.\n\n    Project the predicted 3D bbox to 2D image plane and visualize them.\n\n    Args:\n        img (np.ndarray): The numpy array of image in cv2 fashion.\n        gt_bboxes (:obj:`BaseInstance3DBoxes`): Ground truth boxes.\n        pred_bboxes (:obj:`BaseInstance3DBoxes`): Predicted boxes.\n        proj_mat (numpy.array, shape=[4, 4]): The projection matrix\n            according to the camera intrinsic parameters.\n        out_dir (str): Path of output directory.\n        filename (str): Filename of the current frame.\n        box_mode (str, optional): Coordinate system the boxes are in.\n            Should be one of 'depth', 'lidar' and 'camera'.\n            Defaults to 'lidar'.\n        img_metas (dict, optional): Used in projecting depth bbox.\n            Defaults to None.\n        show (bool, optional): Visualize the results online. Defaults to False.\n        gt_bbox_color (str or tuple(int), optional): Color of bbox lines.\n           The tuple of color should be in BGR order. Default: (255, 102, 61).\n        pred_bbox_color (str or tuple(int), optional): Color of bbox lines.\n           The tuple of color should be in BGR order. Default: (72, 101, 241).\n    \"\"\"\n    if box_mode == 'depth':\n        draw_bbox = draw_depth_bbox3d_on_img\n    elif box_mode == 'lidar':\n        draw_bbox = draw_lidar_bbox3d_on_img\n    elif box_mode == 'camera':\n        draw_bbox = draw_camera_bbox3d_on_img\n    else:\n        raise NotImplementedError(f'unsupported box mode {box_mode}')\n\n    result_path = osp.join(out_dir, filename)\n    mmcv.mkdir_or_exist(result_path)\n\n    if show:\n        show_img = img.copy()\n        if gt_bboxes is not None:\n            show_img = draw_bbox(\n                gt_bboxes, show_img, proj_mat, img_metas, color=gt_bbox_color)\n        if pred_bboxes is not None:\n            show_img = draw_bbox(\n                pred_bboxes,\n                show_img,\n                proj_mat,\n                img_metas,\n                color=pred_bbox_color)\n        mmcv.imshow(show_img, win_name='project_bbox3d_img', wait_time=0)\n\n    if img is not None:\n        mmcv.imwrite(img, osp.join(result_path, f'{filename}_img.png'))\n\n    if gt_bboxes is not None:\n        gt_img = draw_bbox(\n            gt_bboxes, img, proj_mat, img_metas, color=gt_bbox_color)\n        mmcv.imwrite(gt_img, osp.join(result_path, f'{filename}_gt.png'))\n\n    if pred_bboxes is not None:\n        pred_img = draw_bbox(\n            pred_bboxes, img, proj_mat, img_metas, color=pred_bbox_color)\n        mmcv.imwrite(pred_img, osp.join(result_path, f'{filename}_pred.png'))\n"
  },
  {
    "path": "mmdet3d/core/voxel/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .builder import build_voxel_generator\nfrom .voxel_generator import VoxelGenerator\n\n__all__ = ['build_voxel_generator', 'VoxelGenerator']\n"
  },
  {
    "path": "mmdet3d/core/voxel/builder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\n\nfrom . import voxel_generator\n\n\ndef build_voxel_generator(cfg, **kwargs):\n    \"\"\"Builder of voxel generator.\"\"\"\n    if isinstance(cfg, voxel_generator.VoxelGenerator):\n        return cfg\n    elif isinstance(cfg, dict):\n        return mmcv.runner.obj_from_dict(\n            cfg, voxel_generator, default_args=kwargs)\n    else:\n        raise TypeError('Invalid type {} for building a sampler'.format(\n            type(cfg)))\n"
  },
  {
    "path": "mmdet3d/core/voxel/voxel_generator.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numba\nimport numpy as np\n\n\nclass VoxelGenerator(object):\n    \"\"\"Voxel generator in numpy implementation.\n\n    Args:\n        voxel_size (list[float]): Size of a single voxel\n        point_cloud_range (list[float]): Range of points\n        max_num_points (int): Maximum number of points in a single voxel\n        max_voxels (int, optional): Maximum number of voxels.\n            Defaults to 20000.\n    \"\"\"\n\n    def __init__(self,\n                 voxel_size,\n                 point_cloud_range,\n                 max_num_points,\n                 max_voxels=20000):\n\n        point_cloud_range = np.array(point_cloud_range, dtype=np.float32)\n        # [0, -40, -3, 70.4, 40, 1]\n        voxel_size = np.array(voxel_size, dtype=np.float32)\n        grid_size = (point_cloud_range[3:] -\n                     point_cloud_range[:3]) / voxel_size\n        grid_size = np.round(grid_size).astype(np.int64)\n\n        self._voxel_size = voxel_size\n        self._point_cloud_range = point_cloud_range\n        self._max_num_points = max_num_points\n        self._max_voxels = max_voxels\n        self._grid_size = grid_size\n\n    def generate(self, points):\n        \"\"\"Generate voxels given points.\"\"\"\n        return points_to_voxel(points, self._voxel_size,\n                               self._point_cloud_range, self._max_num_points,\n                               True, self._max_voxels)\n\n    @property\n    def voxel_size(self):\n        \"\"\"list[float]: Size of a single voxel.\"\"\"\n        return self._voxel_size\n\n    @property\n    def max_num_points_per_voxel(self):\n        \"\"\"int: Maximum number of points per voxel.\"\"\"\n        return self._max_num_points\n\n    @property\n    def point_cloud_range(self):\n        \"\"\"list[float]: Range of point cloud.\"\"\"\n        return self._point_cloud_range\n\n    @property\n    def grid_size(self):\n        \"\"\"np.ndarray: The size of grids.\"\"\"\n        return self._grid_size\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        indent = ' ' * (len(repr_str) + 1)\n        repr_str += f'(voxel_size={self._voxel_size},\\n'\n        repr_str += indent + 'point_cloud_range='\n        repr_str += f'{self._point_cloud_range.tolist()},\\n'\n        repr_str += indent + f'max_num_points={self._max_num_points},\\n'\n        repr_str += indent + f'max_voxels={self._max_voxels},\\n'\n        repr_str += indent + f'grid_size={self._grid_size.tolist()}'\n        repr_str += ')'\n        return repr_str\n\n\ndef points_to_voxel(points,\n                    voxel_size,\n                    coors_range,\n                    max_points=35,\n                    reverse_index=True,\n                    max_voxels=20000):\n    \"\"\"convert kitti points(N, >=3) to voxels.\n\n    Args:\n        points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and\n            points[:, 3:] contain other information such as reflectivity.\n        voxel_size (list, tuple, np.ndarray): [3] xyz, indicate voxel size\n        coors_range (list[float | tuple[float] | ndarray]): Voxel range.\n            format: xyzxyz, minmax\n        max_points (int): Indicate maximum points contained in a voxel.\n        reverse_index (bool): Whether return reversed coordinates.\n            if points has xyz format and reverse_index is True, output\n            coordinates will be zyx format, but points in features always\n            xyz format.\n        max_voxels (int): Maximum number of voxels this function creates.\n            For second, 20000 is a good choice. Points should be shuffled for\n            randomness before this function because max_voxels drops points.\n\n    Returns:\n        tuple[np.ndarray]:\n            voxels: [M, max_points, ndim] float tensor. only contain points.\n            coordinates: [M, 3] int32 tensor.\n            num_points_per_voxel: [M] int32 tensor.\n    \"\"\"\n    if not isinstance(voxel_size, np.ndarray):\n        voxel_size = np.array(voxel_size, dtype=points.dtype)\n    if not isinstance(coors_range, np.ndarray):\n        coors_range = np.array(coors_range, dtype=points.dtype)\n    voxelmap_shape = (coors_range[3:] - coors_range[:3]) / voxel_size\n    voxelmap_shape = tuple(np.round(voxelmap_shape).astype(np.int32).tolist())\n    if reverse_index:\n        voxelmap_shape = voxelmap_shape[::-1]\n    # don't create large array in jit(nopython=True) code.\n    num_points_per_voxel = np.zeros(shape=(max_voxels, ), dtype=np.int32)\n    coor_to_voxelidx = -np.ones(shape=voxelmap_shape, dtype=np.int32)\n    voxels = np.zeros(\n        shape=(max_voxels, max_points, points.shape[-1]), dtype=points.dtype)\n    coors = np.zeros(shape=(max_voxels, 3), dtype=np.int32)\n    if reverse_index:\n        voxel_num = _points_to_voxel_reverse_kernel(\n            points, voxel_size, coors_range, num_points_per_voxel,\n            coor_to_voxelidx, voxels, coors, max_points, max_voxels)\n\n    else:\n        voxel_num = _points_to_voxel_kernel(points, voxel_size, coors_range,\n                                            num_points_per_voxel,\n                                            coor_to_voxelidx, voxels, coors,\n                                            max_points, max_voxels)\n\n    coors = coors[:voxel_num]\n    voxels = voxels[:voxel_num]\n    num_points_per_voxel = num_points_per_voxel[:voxel_num]\n\n    return voxels, coors, num_points_per_voxel\n\n\n@numba.jit(nopython=True)\ndef _points_to_voxel_reverse_kernel(points,\n                                    voxel_size,\n                                    coors_range,\n                                    num_points_per_voxel,\n                                    coor_to_voxelidx,\n                                    voxels,\n                                    coors,\n                                    max_points=35,\n                                    max_voxels=20000):\n    \"\"\"convert kitti points(N, >=3) to voxels.\n\n    Args:\n        points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and\n            points[:, 3:] contain other information such as reflectivity.\n        voxel_size (list, tuple, np.ndarray): [3] xyz, indicate voxel size\n        coors_range (list[float | tuple[float] | ndarray]): Range of voxels.\n            format: xyzxyz, minmax\n        num_points_per_voxel (int): Number of points per voxel.\n        coor_to_voxel_idx (np.ndarray): A voxel grid of shape (D, H, W),\n            which has the same shape as the complete voxel map. It indicates\n            the index of each corresponding voxel.\n        voxels (np.ndarray): Created empty voxels.\n        coors (np.ndarray): Created coordinates of each voxel.\n        max_points (int): Indicate maximum points contained in a voxel.\n        max_voxels (int): Maximum number of voxels this function create.\n            for second, 20000 is a good choice. Points should be shuffled for\n            randomness before this function because max_voxels drops points.\n\n    Returns:\n        tuple[np.ndarray]:\n            voxels: Shape [M, max_points, ndim], only contain points.\n            coordinates: Shape [M, 3].\n            num_points_per_voxel: Shape [M].\n    \"\"\"\n    # put all computations to one loop.\n    # we shouldn't create large array in main jit code, otherwise\n    # reduce performance\n    N = points.shape[0]\n    # ndim = points.shape[1] - 1\n    ndim = 3\n    ndim_minus_1 = ndim - 1\n    grid_size = (coors_range[3:] - coors_range[:3]) / voxel_size\n    # np.round(grid_size)\n    # grid_size = np.round(grid_size).astype(np.int64)(np.int32)\n    grid_size = np.round(grid_size, 0, grid_size).astype(np.int32)\n    coor = np.zeros(shape=(3, ), dtype=np.int32)\n    voxel_num = 0\n    failed = False\n    for i in range(N):\n        failed = False\n        for j in range(ndim):\n            c = np.floor((points[i, j] - coors_range[j]) / voxel_size[j])\n            if c < 0 or c >= grid_size[j]:\n                failed = True\n                break\n            coor[ndim_minus_1 - j] = c\n        if failed:\n            continue\n        voxelidx = coor_to_voxelidx[coor[0], coor[1], coor[2]]\n        if voxelidx == -1:\n            voxelidx = voxel_num\n            if voxel_num >= max_voxels:\n                continue\n            voxel_num += 1\n            coor_to_voxelidx[coor[0], coor[1], coor[2]] = voxelidx\n            coors[voxelidx] = coor\n        num = num_points_per_voxel[voxelidx]\n        if num < max_points:\n            voxels[voxelidx, num] = points[i]\n            num_points_per_voxel[voxelidx] += 1\n    return voxel_num\n\n\n@numba.jit(nopython=True)\ndef _points_to_voxel_kernel(points,\n                            voxel_size,\n                            coors_range,\n                            num_points_per_voxel,\n                            coor_to_voxelidx,\n                            voxels,\n                            coors,\n                            max_points=35,\n                            max_voxels=20000):\n    \"\"\"convert kitti points(N, >=3) to voxels.\n\n    Args:\n        points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and\n            points[:, 3:] contain other information such as reflectivity.\n        voxel_size (list, tuple, np.ndarray): [3] xyz, indicate voxel size.\n        coors_range (list[float | tuple[float] | ndarray]): Range of voxels.\n            format: xyzxyz, minmax\n        num_points_per_voxel (int): Number of points per voxel.\n        coor_to_voxel_idx (np.ndarray): A voxel grid of shape (D, H, W),\n            which has the same shape as the complete voxel map. It indicates\n            the index of each corresponding voxel.\n        voxels (np.ndarray): Created empty voxels.\n        coors (np.ndarray): Created coordinates of each voxel.\n        max_points (int): Indicate maximum points contained in a voxel.\n        max_voxels (int): Maximum number of voxels this function create.\n            for second, 20000 is a good choice. Points should be shuffled for\n            randomness before this function because max_voxels drops points.\n\n    Returns:\n        tuple[np.ndarray]:\n            voxels: Shape [M, max_points, ndim], only contain points.\n            coordinates: Shape [M, 3].\n            num_points_per_voxel: Shape [M].\n    \"\"\"\n    N = points.shape[0]\n    # ndim = points.shape[1] - 1\n    ndim = 3\n    grid_size = (coors_range[3:] - coors_range[:3]) / voxel_size\n    # grid_size = np.round(grid_size).astype(np.int64)(np.int32)\n    grid_size = np.round(grid_size, 0, grid_size).astype(np.int32)\n\n    # lower_bound = coors_range[:3]\n    # upper_bound = coors_range[3:]\n    coor = np.zeros(shape=(3, ), dtype=np.int32)\n    voxel_num = 0\n    failed = False\n    for i in range(N):\n        failed = False\n        for j in range(ndim):\n            c = np.floor((points[i, j] - coors_range[j]) / voxel_size[j])\n            if c < 0 or c >= grid_size[j]:\n                failed = True\n                break\n            coor[j] = c\n        if failed:\n            continue\n        voxelidx = coor_to_voxelidx[coor[0], coor[1], coor[2]]\n        if voxelidx == -1:\n            voxelidx = voxel_num\n            if voxel_num >= max_voxels:\n                continue\n            voxel_num += 1\n            coor_to_voxelidx[coor[0], coor[1], coor[2]] = voxelidx\n            coors[voxelidx] = coor\n        num = num_points_per_voxel[voxelidx]\n        if num < max_points:\n            voxels[voxelidx, num] = points[i]\n            num_points_per_voxel[voxelidx] += 1\n    return voxel_num\n"
  },
  {
    "path": "mmdet3d/datasets/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n# from mmdet.datasets.builder import build_dataloader, \nfrom .builder import DATASETS, PIPELINES, build_dataset, build_dataloader\nfrom .custom_3d import Custom3DDataset\nfrom .custom_3d_seg import Custom3DSegDataset\nfrom .kitti_dataset import KittiDataset\nfrom .kitti_mono_dataset import KittiMonoDataset\nfrom .lyft_dataset import LyftDataset\nfrom .nuscenes_dataset import NuScenesDataset, NuscenesOccupancy\nfrom .nuscenes_mono_dataset import NuScenesMonoDataset\n# yapf: disable\nfrom .pipelines import (AffineResize, BackgroundPointsFilter, GlobalAlignment,\n                        GlobalRotScaleTrans, IndoorPatchPointSample,\n                        IndoorPointSample, LoadAnnotations3D,\n                        LoadPointsFromDict, LoadPointsFromFile,\n                        LoadPointsFromMultiSweeps, MultiViewWrapper,\n                        NormalizePointsColor, ObjectNameFilter, ObjectNoise,\n                        ObjectRangeFilter, ObjectSample, PointSample,\n                        PointShuffle, PointsRangeFilter, RandomDropPointsColor,\n                        RandomFlip3D, RandomJitterPoints, RandomRotate,\n                        RandomShiftScale, RangeLimitedRandomCrop,\n                        VoxelBasedPointSampler)\n# yapf: enable\nfrom .s3dis_dataset import S3DISDataset, S3DISSegDataset\nfrom .scannet_dataset import (ScanNetDataset, ScanNetInstanceSegDataset,\n                              ScanNetSegDataset)\nfrom .semantickitti_dataset import SemanticKITTIDataset\nfrom .sunrgbd_dataset import SUNRGBDDataset\nfrom .utils import get_loading_pipeline\nfrom .waymo_dataset import WaymoDataset\nfrom .samplers import InfiniteGroupEachSampleInBatchSampler\n\n__all__ = [\n    'KittiDataset', 'KittiMonoDataset', 'build_dataloader', 'DATASETS',\n    'build_dataset', 'NuScenesDataset', 'NuScenesMonoDataset', 'LyftDataset',\n    'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans',\n    'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter',\n    'LoadPointsFromFile', 'S3DISSegDataset', 'S3DISDataset',\n    'NormalizePointsColor', 'IndoorPatchPointSample', 'IndoorPointSample',\n    'PointSample', 'LoadAnnotations3D', 'GlobalAlignment', 'SUNRGBDDataset',\n    'ScanNetDataset', 'ScanNetSegDataset', 'ScanNetInstanceSegDataset',\n    'SemanticKITTIDataset', 'Custom3DDataset', 'Custom3DSegDataset',\n    'LoadPointsFromMultiSweeps', 'WaymoDataset', 'BackgroundPointsFilter',\n    'VoxelBasedPointSampler', 'get_loading_pipeline', 'RandomDropPointsColor',\n    'RandomJitterPoints', 'ObjectNameFilter', 'AffineResize',\n    'RandomShiftScale', 'LoadPointsFromDict', 'PIPELINES',\n    'RangeLimitedRandomCrop', 'RandomRotate', 'MultiViewWrapper'\n]\n"
  },
  {
    "path": "mmdet3d/datasets/builder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport platform\nfrom functools import partial\n\nfrom mmcv.utils import Registry, build_from_cfg\nfrom mmcv.parallel import collate\nfrom mmcv.runner import get_dist_info\n\nfrom mmdet.datasets import DATASETS\nfrom mmdet.datasets.builder import _concat_dataset, worker_init_fn\nfrom torch.utils.data import DataLoader\n\nfrom mmdet.datasets.samplers import (DistributedGroupSampler,\n                       DistributedSampler, GroupSampler)\n\nfrom .samplers import InfiniteGroupEachSampleInBatchSampler, CustomDistributedSampler, InfiniteGroupEachSampleInBatchSamplerEval, TTADistributedSampler\n\n\nif platform.system() != 'Windows':\n    # https://github.com/pytorch/pytorch/issues/973\n    import resource\n    rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)\n    base_soft_limit = rlimit[0]\n    hard_limit = rlimit[1]\n    soft_limit = min(max(4096, base_soft_limit), hard_limit)\n    resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit))\n\nOBJECTSAMPLERS = Registry('Object sampler')\nDATASETS = Registry('dataset')\nPIPELINES = Registry('pipeline')\n\n\ndef build_dataset(cfg, default_args=None):\n    from mmdet3d.datasets.dataset_wrappers import CBGSDataset\n    from mmdet.datasets.dataset_wrappers import (ClassBalancedDataset,\n                                                 ConcatDataset, RepeatDataset)\n    if isinstance(cfg, (list, tuple)):\n        dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])\n    elif cfg['type'] == 'ConcatDataset':\n        dataset = ConcatDataset(\n            [build_dataset(c, default_args) for c in cfg['datasets']],\n            cfg.get('separate_eval', True))\n    elif cfg['type'] == 'RepeatDataset':\n        dataset = RepeatDataset(\n            build_dataset(cfg['dataset'], default_args), cfg['times'])\n    elif cfg['type'] == 'ClassBalancedDataset':\n        dataset = ClassBalancedDataset(\n            build_dataset(cfg['dataset'], default_args), cfg['oversample_thr'])\n    elif cfg['type'] == 'CBGSDataset':\n        dataset = CBGSDataset(build_dataset(cfg['dataset'], default_args))\n    elif isinstance(cfg.get('ann_file'), (list, tuple)):\n        dataset = _concat_dataset(cfg, default_args)\n    elif cfg['type'] in DATASETS._module_dict.keys():\n        dataset = build_from_cfg(cfg, DATASETS, default_args)\n    else:\n        dataset = build_from_cfg(cfg, MMDET_DATASETS, default_args)\n    return dataset\n\n\n# https://github.com/open-mmlab/mmdetection/blob/v2.14.0/mmdet/datasets/builder.py\ndef build_dataloader(dataset,\n                     samples_per_gpu,\n                     workers_per_gpu,\n                     num_gpus=1,\n                     dist=True,\n                     shuffle=True,\n                     seed=None,\n                     runner_type='EpochBasedRunner',\n                     val=False,\n                     **kwargs):\n    \"\"\"Build PyTorch DataLoader.\n    In distributed training, each GPU/process has a dataloader.\n    In non-distributed training, there is only one dataloader for all GPUs.\n    Args:\n        dataset (Dataset): A PyTorch dataset.\n        samples_per_gpu (int): Number of training samples on each GPU, i.e.,\n            batch size of each GPU.\n        workers_per_gpu (int): How many subprocesses to use for data loading\n            for each GPU.\n        num_gpus (int): Number of GPUs. Only used in non-distributed training.\n        dist (bool): Distributed training/test or not. Default: True.\n        shuffle (bool): Whether to shuffle the data at every epoch.\n            Default: True.\n        kwargs: any keyword argument to be used to initialize DataLoader\n    Returns:\n        DataLoader: A PyTorch dataloader.\n    \"\"\"\n    rank, world_size = get_dist_info()\n\n    if dist:\n        # When model is :obj:`DistributedDataParallel`,\n        # `batch_size` of :obj:`dataloader` is the\n        # number of training samples on each GPU.\n        batch_size = samples_per_gpu\n        num_workers = workers_per_gpu\n    else:\n        # When model is obj:`DataParallel`\n        # the batch size is samples on all the GPUS\n        batch_size = num_gpus * samples_per_gpu\n        num_workers = num_gpus * workers_per_gpu\n    if val:\n        # runner_type = 'EpochBasedRunner'\n        assert not shuffle\n    if runner_type == 'IterBasedRunner':\n        # TODO: original has more options, but I'm not using them \n        # https://github.com/open-mmlab/mmdetection/blob/3b72b12fe9b14de906d1363982b9fba05e7d47c1/mmdet/datasets/builder.py#L145-L157\n\n        batch_sampler = InfiniteGroupEachSampleInBatchSampler(\n            dataset,\n            batch_size,\n            world_size,\n            rank,\n            seed=seed)\n        batch_size = 1\n        sampler = None\n    elif runner_type == 'IterBasedRunnerEval':\n        # TODO: original has more options, but I'm not using them \n        # https://github.com/open-mmlab/mmdetection/blob/3b72b12fe9b14de906d1363982b9fba05e7d47c1/mmdet/datasets/builder.py#L145-L157\n\n        batch_sampler = InfiniteGroupEachSampleInBatchSamplerEval(\n            dataset,\n            batch_size,\n            world_size,\n            rank,\n            seed=seed)\n        batch_size = 1\n        sampler = None\n    elif runner_type == 'TTARunnerEval':\n        # TODO: original has more options, but I'm not using them \n        # https://github.com/open-mmlab/mmdetection/blob/3b72b12fe9b14de906d1363982b9fba05e7d47c1/mmdet/datasets/builder.py#L145-L157\n\n        batch_sampler = TTADistributedSampler(\n            dataset,\n            samples_per_gpu,\n            world_size,\n            rank,\n            seed=seed)\n        sampler = None   \n    else:\n        if dist:\n            # DistributedGroupSampler will definitely shuffle the data to satisfy\n            # that images on each GPU are in the same group\n            if shuffle:\n                sampler = DistributedGroupSampler(\n                    dataset, samples_per_gpu, world_size, rank, seed=seed)\n            else:\n                if val:\n                    sampler = CustomDistributedSampler(\n                        dataset, world_size, rank, shuffle=False, seed=seed)\n                else:\n                    sampler = DistributedSampler(\n                        dataset, world_size, rank, shuffle=False, seed=seed)\n        else:\n            sampler = GroupSampler(dataset, samples_per_gpu) if shuffle else None\n\n        batch_sampler = None\n\n    init_fn = partial(\n        worker_init_fn, num_workers=num_workers, rank=rank,\n        seed=seed) if seed is not None else None\n\n    data_loader = DataLoader(\n        dataset,\n        batch_size=batch_size,\n        sampler=sampler,\n        num_workers=num_workers,\n        batch_sampler=batch_sampler,\n        collate_fn=partial(collate, samples_per_gpu=samples_per_gpu),\n        pin_memory=False,\n        worker_init_fn=init_fn,\n        **kwargs)\n\n    return data_loader"
  },
  {
    "path": "mmdet3d/datasets/custom_3d.py",
    "content": "# Copyright (c) 2022-2023, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# https://github.com/NVlabs/FB-BEV/blob/main/LICENSE\n\n\n# Copyright (c) OpenMMLab. All rights reserved.\nimport tempfile\nimport warnings\nfrom os import path as osp\n\nimport mmcv\nimport numpy as np\nfrom torch.utils.data import Dataset\n\nfrom ..core.bbox import get_box_type\nfrom .builder import DATASETS\nfrom .pipelines import Compose\nfrom .utils import extract_result_dict, get_loading_pipeline\nimport time\n\n@DATASETS.register_module()\nclass Custom3DDataset(Dataset):\n    \"\"\"Customized 3D dataset.\n\n    This is the base dataset of SUNRGB-D, ScanNet, nuScenes, and KITTI\n    dataset.\n\n    .. code-block:: none\n\n    [\n        {'sample_idx':\n         'lidar_points': {'lidar_path': velodyne_path,\n                           ....\n                         },\n         'annos': {'box_type_3d':  (str)  'LiDAR/Camera/Depth'\n                   'gt_bboxes_3d':  <np.ndarray> (n, 7)\n                   'gt_names':  [list]\n                   ....\n               }\n         'calib': { .....}\n         'images': { .....}\n        }\n    ]\n\n    Args:\n        data_root (str): Path of dataset root.\n        ann_file (str): Path of annotation file.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        box_type_3d (str, optional): Type of 3D box of this dataset.\n            Based on the `box_type_3d`, the dataset will encapsulate the box\n            to its original format then converted them to `box_type_3d`.\n            Defaults to 'LiDAR'. Available options includes\n\n            - 'LiDAR': Box in LiDAR coordinates.\n            - 'Depth': Box in depth coordinates, usually for indoor dataset.\n            - 'Camera': Box in camera coordinates.\n        filter_empty_gt (bool, optional): Whether to filter empty GT.\n            Defaults to True.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n    \"\"\"\n\n    def __init__(self,\n                 data_root,\n                 ann_file,\n                 pipeline=None,\n                 classes=None,\n                 modality=None,\n                 box_type_3d='LiDAR',\n                 filter_empty_gt=True,\n                 test_mode=False,\n                 file_client_args=dict(backend='disk')):\n        super().__init__()\n        self.data_root = data_root\n        self.ann_file = ann_file\n        self.test_mode = test_mode\n        self.modality = modality\n        self.filter_empty_gt = filter_empty_gt\n        self.box_type_3d, self.box_mode_3d = get_box_type(box_type_3d)\n\n        self.CLASSES = self.get_classes(classes)\n        self.file_client = mmcv.FileClient(**file_client_args)\n        self.cat2id = {name: i for i, name in enumerate(self.CLASSES)}\n\n        # load annotations\n        if hasattr(self.file_client, 'get_local_path'):\n            with self.file_client.get_local_path(self.ann_file) as local_path:\n                self.data_infos = self.load_annotations(open(local_path, 'rb'))\n        else:\n            warnings.warn(\n                'The used MMCV version does not have get_local_path. '\n                f'We treat the {self.ann_file} as local paths and it '\n                'might cause errors if the path is not a local path. '\n                'Please use MMCV>= 1.3.16 if you meet errors.')\n            self.data_infos = self.load_annotations(self.ann_file)\n\n        # process pipeline\n        if pipeline is not None:\n            self.pipeline = Compose(pipeline)\n\n        # set group flag for the samplers\n        if not self.test_mode:\n            self._set_group_flag()\n\n    def load_annotations(self, ann_file):\n        \"\"\"Load annotations from ann_file.\n\n        Args:\n            ann_file (str): Path of the annotation file.\n\n        Returns:\n            list[dict]: List of annotations.\n        \"\"\"\n        # loading data from a file-like object needs file format\n        return mmcv.load(ann_file, file_format='pkl')\n\n    def get_data_info(self, index):\n        \"\"\"Get data info according to the given index.\n\n        Args:\n            index (int): Index of the sample data to get.\n\n        Returns:\n            dict: Data information that will be passed to the data\n                preprocessing pipelines. It includes the following keys:\n\n                - sample_idx (str): Sample index.\n                - pts_filename (str): Filename of point clouds.\n                - file_name (str): Filename of point clouds.\n                - ann_info (dict): Annotation info.\n        \"\"\"\n        info = self.data_infos[index]\n        sample_idx = info['sample_idx']\n        pts_filename = osp.join(self.data_root,\n                                info['lidar_points']['lidar_path'])\n\n        input_dict = dict(\n            pts_filename=pts_filename,\n            sample_idx=sample_idx,\n            file_name=pts_filename)\n\n        if not self.test_mode:\n            annos = self.get_ann_info(index)\n            input_dict['ann_info'] = annos\n            if self.filter_empty_gt and ~(annos['gt_labels_3d'] != -1).any():\n                return None\n        return input_dict\n\n    def get_ann_info(self, index):\n        \"\"\"Get annotation info according to the given index.\n\n        Args:\n            index (int): Index of the annotation data to get.\n\n        Returns:\n            dict: Annotation information consists of the following keys:\n\n                - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`):\n                    3D ground truth bboxes\n                - gt_labels_3d (np.ndarray): Labels of ground truths.\n                - gt_names (list[str]): Class names of ground truths.\n        \"\"\"\n        info = self.data_infos[index]\n        gt_bboxes_3d = info['annos']['gt_bboxes_3d']\n        gt_names_3d = info['annos']['gt_names']\n        gt_labels_3d = []\n        for cat in gt_names_3d:\n            if cat in self.CLASSES:\n                gt_labels_3d.append(self.CLASSES.index(cat))\n            else:\n                gt_labels_3d.append(-1)\n        gt_labels_3d = np.array(gt_labels_3d)\n\n        # Obtain original box 3d type in info file\n        ori_box_type_3d = info['annos']['box_type_3d']\n        ori_box_type_3d, _ = get_box_type(ori_box_type_3d)\n\n        # turn original box type to target box type\n        gt_bboxes_3d = ori_box_type_3d(\n            gt_bboxes_3d,\n            box_dim=gt_bboxes_3d.shape[-1],\n            origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)\n\n        anns_results = dict(\n            gt_bboxes_3d=gt_bboxes_3d,\n            gt_labels_3d=gt_labels_3d,\n            gt_names=gt_names_3d)\n        return anns_results\n\n    def pre_pipeline(self, results):\n        \"\"\"Initialization before data preparation.\n\n        Args:\n            results (dict): Dict before data preprocessing.\n\n                - img_fields (list): Image fields.\n                - bbox3d_fields (list): 3D bounding boxes fields.\n                - pts_mask_fields (list): Mask fields of points.\n                - pts_seg_fields (list): Mask fields of point segments.\n                - bbox_fields (list): Fields of bounding boxes.\n                - mask_fields (list): Fields of masks.\n                - seg_fields (list): Segment fields.\n                - box_type_3d (str): 3D box type.\n                - box_mode_3d (str): 3D box mode.\n        \"\"\"\n        results['img_fields'] = []\n        results['bbox3d_fields'] = []\n        results['pts_mask_fields'] = []\n        results['pts_seg_fields'] = []\n        results['bbox_fields'] = []\n        results['mask_fields'] = []\n        results['seg_fields'] = []\n        results['box_type_3d'] = self.box_type_3d\n        results['box_mode_3d'] = self.box_mode_3d\n\n   \n    def prepare_train_data(self, index):\n        \"\"\"Training data preparation.\n\n        Args:\n            index (int): Index for accessing the target data.\n\n        Returns:\n            dict: Training data dict of the corresponding index.\n        \"\"\"\n        input_dict = self.get_data_info(index)\n        if input_dict is None:\n            return None\n        self.pre_pipeline(input_dict)\n        example = self.pipeline(input_dict)\n        if self.filter_empty_gt and \\\n                (example is None or\n                    ~(example['gt_labels_3d']._data != -1).any()):\n            return None\n\n        return example\n\n    def prepare_test_data(self, index):\n        \"\"\"Prepare data for testing.\n\n        Args:\n            index (int): Index for accessing the target data.\n\n        Returns:\n            dict: Testing data dict of the corresponding index.\n        \"\"\"\n        input_dict = self.get_data_info(index)\n        self.pre_pipeline(input_dict)\n        example = self.pipeline(input_dict)\n        return example\n\n    @classmethod\n    def get_classes(cls, classes=None):\n        \"\"\"Get class names of current dataset.\n\n        Args:\n            classes (Sequence[str] | str): If classes is None, use\n                default CLASSES defined by builtin dataset. If classes is a\n                string, take it as a file name. The file contains the name of\n                classes where each line contains one class name. If classes is\n                a tuple or list, override the CLASSES defined by the dataset.\n\n        Return:\n            list[str]: A list of class names.\n        \"\"\"\n        if classes is None:\n            return cls.CLASSES\n\n        if isinstance(classes, str):\n            # take it as a file path\n            class_names = mmcv.list_from_file(classes)\n        elif isinstance(classes, (tuple, list)):\n            class_names = classes\n        else:\n            raise ValueError(f'Unsupported type {type(classes)} of classes.')\n\n        return class_names\n\n    def format_results(self,\n                       outputs,\n                       pklfile_prefix=None,\n                       submission_prefix=None):\n        \"\"\"Format the results to pkl file.\n\n        Args:\n            outputs (list[dict]): Testing results of the dataset.\n            pklfile_prefix (str): The prefix of pkl files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n\n        Returns:\n            tuple: (outputs, tmp_dir), outputs is the detection results,\n                tmp_dir is the temporal directory created for saving json\n                files when ``jsonfile_prefix`` is not specified.\n        \"\"\"\n        if pklfile_prefix is None:\n            tmp_dir = tempfile.TemporaryDirectory()\n            pklfile_prefix = osp.join(tmp_dir.name, 'results')\n            out = f'{pklfile_prefix}.pkl'\n        mmcv.dump(outputs, out)\n        return outputs, tmp_dir\n\n    def evaluate(self,\n                 results,\n                 metric=None,\n                 iou_thr=(0.25, 0.5),\n                 logger=None,\n                 show=False,\n                 out_dir=None,\n                 pipeline=None):\n        \"\"\"Evaluate.\n\n        Evaluation in indoor protocol.\n\n        Args:\n            results (list[dict]): List of results.\n            metric (str | list[str], optional): Metrics to be evaluated.\n                Defaults to None.\n            iou_thr (list[float]): AP IoU thresholds. Defaults to (0.25, 0.5).\n            logger (logging.Logger | str, optional): Logger used for printing\n                related information during evaluation. Defaults to None.\n            show (bool, optional): Whether to visualize.\n                Default: False.\n            out_dir (str, optional): Path to save the visualization results.\n                Default: None.\n            pipeline (list[dict], optional): raw data loading for showing.\n                Default: None.\n\n        Returns:\n            dict: Evaluation results.\n        \"\"\"\n        from mmdet3d.core.evaluation import indoor_eval\n        assert isinstance(\n            results, list), f'Expect results to be list, got {type(results)}.'\n        assert len(results) > 0, 'Expect length of results > 0.'\n        assert len(results) == len(self.data_infos)\n        assert isinstance(\n            results[0], dict\n        ), f'Expect elements in results to be dict, got {type(results[0])}.'\n        gt_annos = [info['annos'] for info in self.data_infos]\n        label2cat = {i: cat_id for i, cat_id in enumerate(self.CLASSES)}\n        ret_dict = indoor_eval(\n            gt_annos,\n            results,\n            iou_thr,\n            label2cat,\n            logger=logger,\n            box_type_3d=self.box_type_3d,\n            box_mode_3d=self.box_mode_3d)\n        if show:\n            self.show(results, out_dir, pipeline=pipeline)\n\n        return ret_dict\n\n    def _build_default_pipeline(self):\n        \"\"\"Build the default pipeline for this dataset.\"\"\"\n        raise NotImplementedError('_build_default_pipeline is not implemented '\n                                  f'for dataset {self.__class__.__name__}')\n\n    def _get_pipeline(self, pipeline):\n        \"\"\"Get data loading pipeline in self.show/evaluate function.\n\n        Args:\n            pipeline (list[dict]): Input pipeline. If None is given,\n                get from self.pipeline.\n        \"\"\"\n        if pipeline is None:\n            if not hasattr(self, 'pipeline') or self.pipeline is None:\n                warnings.warn(\n                    'Use default pipeline for data loading, this may cause '\n                    'errors when data is on ceph')\n                return self._build_default_pipeline()\n            loading_pipeline = get_loading_pipeline(self.pipeline.transforms)\n            return Compose(loading_pipeline)\n        return Compose(pipeline)\n\n    def _extract_data(self, index, pipeline, key, load_annos=False):\n        \"\"\"Load data using input pipeline and extract data according to key.\n\n        Args:\n            index (int): Index for accessing the target data.\n            pipeline (:obj:`Compose`): Composed data loading pipeline.\n            key (str | list[str]): One single or a list of data key.\n            load_annos (bool): Whether to load data annotations.\n                If True, need to set self.test_mode as False before loading.\n\n        Returns:\n            np.ndarray | torch.Tensor | list[np.ndarray | torch.Tensor]:\n                A single or a list of loaded data.\n        \"\"\"\n        assert pipeline is not None, 'data loading pipeline is not provided'\n        # when we want to load ground-truth via pipeline (e.g. bbox, seg mask)\n        # we need to set self.test_mode as False so that we have 'annos'\n        if load_annos:\n            original_test_mode = self.test_mode\n            self.test_mode = False\n        input_dict = self.get_data_info(index)\n        self.pre_pipeline(input_dict)\n        example = pipeline(input_dict)\n\n        # extract data items according to keys\n        if isinstance(key, str):\n            data = extract_result_dict(example, key)\n        else:\n            data = [extract_result_dict(example, k) for k in key]\n        if load_annos:\n            self.test_mode = original_test_mode\n\n        return data\n\n    def __len__(self):\n        \"\"\"Return the length of data infos.\n\n        Returns:\n            int: Length of data infos.\n        \"\"\"\n        return len(self.data_infos)\n\n    def _rand_another(self, idx):\n        \"\"\"Randomly get another item with the same flag.\n\n        Returns:\n            int: Another index of item with the same flag.\n        \"\"\"\n        pool = np.where(self.flag == self.flag[idx])[0]\n        return np.random.choice(pool)\n\n    def __getitem__(self, idx):\n        \"\"\"Get item from infos according to the given index.\n\n        Returns:\n            dict: Data dictionary of the corresponding index.\n        \"\"\"\n        if self.test_mode:\n            return self.prepare_test_data(idx)\n        while True:\n            data = self.prepare_train_data(idx)\n            if data is None:\n                idx = self._rand_another(idx)\n                continue\n            return data\n\n    def _set_group_flag(self):\n        \"\"\"Set flag according to image aspect ratio.\n\n        Images with aspect ratio greater than 1 will be set as group 1,\n        otherwise group 0. In 3D datasets, they are all the same, thus are all\n        zeros.\n        \"\"\"\n        self.flag = np.zeros(len(self), dtype=np.uint8)\n    \n"
  },
  {
    "path": "mmdet3d/datasets/custom_3d_seg.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport tempfile\nimport warnings\nfrom os import path as osp\n\nimport mmcv\nimport numpy as np\nfrom torch.utils.data import Dataset\n\nfrom mmseg.datasets import DATASETS as SEG_DATASETS\nfrom .builder import DATASETS\nfrom .pipelines import Compose\nfrom .utils import extract_result_dict, get_loading_pipeline\n\n\n@DATASETS.register_module()\n@SEG_DATASETS.register_module()\nclass Custom3DSegDataset(Dataset):\n    \"\"\"Customized 3D dataset for semantic segmentation task.\n\n    This is the base dataset of ScanNet and S3DIS dataset.\n\n    Args:\n        data_root (str): Path of dataset root.\n        ann_file (str): Path of annotation file.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        palette (list[list[int]], optional): The palette of segmentation map.\n            Defaults to None.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n        ignore_index (int, optional): The label index to be ignored, e.g.\n            unannotated points. If None is given, set to len(self.CLASSES) to\n            be consistent with PointSegClassMapping function in pipeline.\n            Defaults to None.\n        scene_idxs (np.ndarray | str, optional): Precomputed index to load\n            data. For scenes with many points, we may sample it several times.\n            Defaults to None.\n    \"\"\"\n    # names of all classes data used for the task\n    CLASSES = None\n\n    # class_ids used for training\n    VALID_CLASS_IDS = None\n\n    # all possible class_ids in loaded segmentation mask\n    ALL_CLASS_IDS = None\n\n    # official color for visualization\n    PALETTE = None\n\n    def __init__(self,\n                 data_root,\n                 ann_file,\n                 pipeline=None,\n                 classes=None,\n                 palette=None,\n                 modality=None,\n                 test_mode=False,\n                 ignore_index=None,\n                 scene_idxs=None,\n                 file_client_args=dict(backend='disk')):\n        super().__init__()\n        self.data_root = data_root\n        self.ann_file = ann_file\n        self.test_mode = test_mode\n        self.modality = modality\n        self.file_client = mmcv.FileClient(**file_client_args)\n\n        # load annotations\n        if hasattr(self.file_client, 'get_local_path'):\n            with self.file_client.get_local_path(self.ann_file) as local_path:\n                self.data_infos = self.load_annotations(open(local_path, 'rb'))\n        else:\n            warnings.warn(\n                'The used MMCV version does not have get_local_path. '\n                f'We treat the {self.ann_file} as local paths and it '\n                'might cause errors if the path is not a local path. '\n                'Please use MMCV>= 1.3.16 if you meet errors.')\n            self.data_infos = self.load_annotations(self.ann_file)\n\n        if pipeline is not None:\n            self.pipeline = Compose(pipeline)\n\n        self.ignore_index = len(self.CLASSES) if \\\n            ignore_index is None else ignore_index\n\n        self.scene_idxs = self.get_scene_idxs(scene_idxs)\n        self.CLASSES, self.PALETTE = \\\n            self.get_classes_and_palette(classes, palette)\n\n        # set group flag for the sampler\n        if not self.test_mode:\n            self._set_group_flag()\n\n    def load_annotations(self, ann_file):\n        \"\"\"Load annotations from ann_file.\n\n        Args:\n            ann_file (str): Path of the annotation file.\n\n        Returns:\n            list[dict]: List of annotations.\n        \"\"\"\n        # loading data from a file-like object needs file format\n        return mmcv.load(ann_file, file_format='pkl')\n\n    def get_data_info(self, index):\n        \"\"\"Get data info according to the given index.\n\n        Args:\n            index (int): Index of the sample data to get.\n\n        Returns:\n            dict: Data information that will be passed to the data\n                preprocessing pipelines. It includes the following keys:\n\n                - sample_idx (str): Sample index.\n                - pts_filename (str): Filename of point clouds.\n                - file_name (str): Filename of point clouds.\n                - ann_info (dict): Annotation info.\n        \"\"\"\n        info = self.data_infos[index]\n        sample_idx = info['point_cloud']['lidar_idx']\n        pts_filename = osp.join(self.data_root, info['pts_path'])\n\n        input_dict = dict(\n            pts_filename=pts_filename,\n            sample_idx=sample_idx,\n            file_name=pts_filename)\n\n        if not self.test_mode:\n            annos = self.get_ann_info(index)\n            input_dict['ann_info'] = annos\n        return input_dict\n\n    def pre_pipeline(self, results):\n        \"\"\"Initialization before data preparation.\n\n        Args:\n            results (dict): Dict before data preprocessing.\n\n                - img_fields (list): Image fields.\n                - pts_mask_fields (list): Mask fields of points.\n                - pts_seg_fields (list): Mask fields of point segments.\n                - mask_fields (list): Fields of masks.\n                - seg_fields (list): Segment fields.\n        \"\"\"\n        results['img_fields'] = []\n        results['pts_mask_fields'] = []\n        results['pts_seg_fields'] = []\n        results['mask_fields'] = []\n        results['seg_fields'] = []\n        results['bbox3d_fields'] = []\n\n    def prepare_train_data(self, index):\n        \"\"\"Training data preparation.\n\n        Args:\n            index (int): Index for accessing the target data.\n\n        Returns:\n            dict: Training data dict of the corresponding index.\n        \"\"\"\n        input_dict = self.get_data_info(index)\n        if input_dict is None:\n            return None\n        self.pre_pipeline(input_dict)\n        example = self.pipeline(input_dict)\n        return example\n\n    def prepare_test_data(self, index):\n        \"\"\"Prepare data for testing.\n\n        Args:\n            index (int): Index for accessing the target data.\n\n        Returns:\n            dict: Testing data dict of the corresponding index.\n        \"\"\"\n        input_dict = self.get_data_info(index)\n        self.pre_pipeline(input_dict)\n        example = self.pipeline(input_dict)\n        return example\n\n    def get_classes_and_palette(self, classes=None, palette=None):\n        \"\"\"Get class names of current dataset.\n\n        This function is taken from MMSegmentation.\n\n        Args:\n            classes (Sequence[str] | str): If classes is None, use\n                default CLASSES defined by builtin dataset. If classes is a\n                string, take it as a file name. The file contains the name of\n                classes where each line contains one class name. If classes is\n                a tuple or list, override the CLASSES defined by the dataset.\n                Defaults to None.\n            palette (Sequence[Sequence[int]]] | np.ndarray):\n                The palette of segmentation map. If None is given, random\n                palette will be generated. Defaults to None.\n        \"\"\"\n        if classes is None:\n            self.custom_classes = False\n            # map id in the loaded mask to label used for training\n            self.label_map = {\n                cls_id: self.ignore_index\n                for cls_id in self.ALL_CLASS_IDS\n            }\n            self.label_map.update(\n                {cls_id: i\n                 for i, cls_id in enumerate(self.VALID_CLASS_IDS)})\n            # map label to category name\n            self.label2cat = {\n                i: cat_name\n                for i, cat_name in enumerate(self.CLASSES)\n            }\n            return self.CLASSES, self.PALETTE\n\n        self.custom_classes = True\n        if isinstance(classes, str):\n            # take it as a file path\n            class_names = mmcv.list_from_file(classes)\n        elif isinstance(classes, (tuple, list)):\n            class_names = classes\n        else:\n            raise ValueError(f'Unsupported type {type(classes)} of classes.')\n\n        if self.CLASSES:\n            if not set(class_names).issubset(self.CLASSES):\n                raise ValueError('classes is not a subset of CLASSES.')\n\n            # update valid_class_ids\n            self.VALID_CLASS_IDS = [\n                self.VALID_CLASS_IDS[self.CLASSES.index(cls_name)]\n                for cls_name in class_names\n            ]\n\n            # dictionary, its keys are the old label ids and its values\n            # are the new label ids.\n            # used for changing pixel labels in load_annotations.\n            self.label_map = {\n                cls_id: self.ignore_index\n                for cls_id in self.ALL_CLASS_IDS\n            }\n            self.label_map.update(\n                {cls_id: i\n                 for i, cls_id in enumerate(self.VALID_CLASS_IDS)})\n            self.label2cat = {\n                i: cat_name\n                for i, cat_name in enumerate(class_names)\n            }\n\n        # modify palette for visualization\n        palette = [\n            self.PALETTE[self.CLASSES.index(cls_name)]\n            for cls_name in class_names\n        ]\n\n        return class_names, palette\n\n    def get_scene_idxs(self, scene_idxs):\n        \"\"\"Compute scene_idxs for data sampling.\n\n        We sample more times for scenes with more points.\n        \"\"\"\n        if self.test_mode:\n            # when testing, we load one whole scene every time\n            return np.arange(len(self.data_infos)).astype(np.int32)\n\n        # we may need to re-sample different scenes according to scene_idxs\n        # this is necessary for indoor scene segmentation such as ScanNet\n        if scene_idxs is None:\n            scene_idxs = np.arange(len(self.data_infos))\n        if isinstance(scene_idxs, str):\n            with self.file_client.get_local_path(scene_idxs) as local_path:\n                scene_idxs = np.load(local_path)\n        else:\n            scene_idxs = np.array(scene_idxs)\n\n        return scene_idxs.astype(np.int32)\n\n    def format_results(self,\n                       outputs,\n                       pklfile_prefix=None,\n                       submission_prefix=None):\n        \"\"\"Format the results to pkl file.\n\n        Args:\n            outputs (list[dict]): Testing results of the dataset.\n            pklfile_prefix (str): The prefix of pkl files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n\n        Returns:\n            tuple: (outputs, tmp_dir), outputs is the detection results,\n                tmp_dir is the temporal directory created for saving json\n                files when ``jsonfile_prefix`` is not specified.\n        \"\"\"\n        if pklfile_prefix is None:\n            tmp_dir = tempfile.TemporaryDirectory()\n            pklfile_prefix = osp.join(tmp_dir.name, 'results')\n            out = f'{pklfile_prefix}.pkl'\n        mmcv.dump(outputs, out)\n        return outputs, tmp_dir\n\n    def evaluate(self,\n                 results,\n                 metric=None,\n                 logger=None,\n                 show=False,\n                 out_dir=None,\n                 pipeline=None):\n        \"\"\"Evaluate.\n\n        Evaluation in semantic segmentation protocol.\n\n        Args:\n            results (list[dict]): List of results.\n            metric (str | list[str]): Metrics to be evaluated.\n            logger (logging.Logger | str, optional): Logger used for printing\n                related information during evaluation. Defaults to None.\n            show (bool, optional): Whether to visualize.\n                Defaults to False.\n            out_dir (str, optional): Path to save the visualization results.\n                Defaults to None.\n            pipeline (list[dict], optional): raw data loading for showing.\n                Default: None.\n\n        Returns:\n            dict: Evaluation results.\n        \"\"\"\n        from mmdet3d.core.evaluation import seg_eval\n        assert isinstance(\n            results, list), f'Expect results to be list, got {type(results)}.'\n        assert len(results) > 0, 'Expect length of results > 0.'\n        assert len(results) == len(self.data_infos)\n        assert isinstance(\n            results[0], dict\n        ), f'Expect elements in results to be dict, got {type(results[0])}.'\n\n        load_pipeline = self._get_pipeline(pipeline)\n        pred_sem_masks = [result['semantic_mask'] for result in results]\n        gt_sem_masks = [\n            self._extract_data(\n                i, load_pipeline, 'pts_semantic_mask', load_annos=True)\n            for i in range(len(self.data_infos))\n        ]\n        ret_dict = seg_eval(\n            gt_sem_masks,\n            pred_sem_masks,\n            self.label2cat,\n            self.ignore_index,\n            logger=logger)\n\n        if show:\n            self.show(pred_sem_masks, out_dir, pipeline=pipeline)\n\n        return ret_dict\n\n    def _rand_another(self, idx):\n        \"\"\"Randomly get another item with the same flag.\n\n        Returns:\n            int: Another index of item with the same flag.\n        \"\"\"\n        pool = np.where(self.flag == self.flag[idx])[0]\n        return np.random.choice(pool)\n\n    def _build_default_pipeline(self):\n        \"\"\"Build the default pipeline for this dataset.\"\"\"\n        raise NotImplementedError('_build_default_pipeline is not implemented '\n                                  f'for dataset {self.__class__.__name__}')\n\n    def _get_pipeline(self, pipeline):\n        \"\"\"Get data loading pipeline in self.show/evaluate function.\n\n        Args:\n            pipeline (list[dict]): Input pipeline. If None is given,\n                get from self.pipeline.\n        \"\"\"\n        if pipeline is None:\n            if not hasattr(self, 'pipeline') or self.pipeline is None:\n                warnings.warn(\n                    'Use default pipeline for data loading, this may cause '\n                    'errors when data is on ceph')\n                return self._build_default_pipeline()\n            loading_pipeline = get_loading_pipeline(self.pipeline.transforms)\n            return Compose(loading_pipeline)\n        return Compose(pipeline)\n\n    def _extract_data(self, index, pipeline, key, load_annos=False):\n        \"\"\"Load data using input pipeline and extract data according to key.\n\n        Args:\n            index (int): Index for accessing the target data.\n            pipeline (:obj:`Compose`): Composed data loading pipeline.\n            key (str | list[str]): One single or a list of data key.\n            load_annos (bool): Whether to load data annotations.\n                If True, need to set self.test_mode as False before loading.\n\n        Returns:\n            np.ndarray | torch.Tensor | list[np.ndarray | torch.Tensor]:\n                A single or a list of loaded data.\n        \"\"\"\n        assert pipeline is not None, 'data loading pipeline is not provided'\n        # when we want to load ground-truth via pipeline (e.g. bbox, seg mask)\n        # we need to set self.test_mode as False so that we have 'annos'\n        if load_annos:\n            original_test_mode = self.test_mode\n            self.test_mode = False\n        input_dict = self.get_data_info(index)\n        self.pre_pipeline(input_dict)\n        example = pipeline(input_dict)\n\n        # extract data items according to keys\n        if isinstance(key, str):\n            data = extract_result_dict(example, key)\n        else:\n            data = [extract_result_dict(example, k) for k in key]\n        if load_annos:\n            self.test_mode = original_test_mode\n\n        return data\n\n    def __len__(self):\n        \"\"\"Return the length of scene_idxs.\n\n        Returns:\n            int: Length of data infos.\n        \"\"\"\n        return len(self.scene_idxs)\n\n    def __getitem__(self, idx):\n        \"\"\"Get item from infos according to the given index.\n\n        In indoor scene segmentation task, each scene contains millions of\n        points. However, we only sample less than 10k points within a patch\n        each time. Therefore, we use `scene_idxs` to re-sample different rooms.\n\n        Returns:\n            dict: Data dictionary of the corresponding index.\n        \"\"\"\n        scene_idx = self.scene_idxs[idx]  # map to scene idx\n        if self.test_mode:\n            return self.prepare_test_data(scene_idx)\n        while True:\n            data = self.prepare_train_data(scene_idx)\n            if data is None:\n                idx = self._rand_another(idx)\n                scene_idx = self.scene_idxs[idx]  # map to scene idx\n                continue\n            return data\n\n    def _set_group_flag(self):\n        \"\"\"Set flag according to image aspect ratio.\n\n        Images with aspect ratio greater than 1 will be set as group 1,\n        otherwise group 0. In 3D datasets, they are all the same, thus are all\n        zeros.\n        \"\"\"\n        self.flag = np.zeros(len(self), dtype=np.uint8)\n"
  },
  {
    "path": "mmdet3d/datasets/dataset_wrappers.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\n\nfrom .builder import DATASETS\n\n\n@DATASETS.register_module()\nclass CBGSDataset(object):\n    \"\"\"A wrapper of class sampled dataset with ann_file path. Implementation of\n    paper `Class-balanced Grouping and Sampling for Point Cloud 3D Object\n    Detection <https://arxiv.org/abs/1908.09492.>`_.\n\n    Balance the number of scenes under different classes.\n\n    Args:\n        dataset (:obj:`CustomDataset`): The dataset to be class sampled.\n    \"\"\"\n\n    def __init__(self, dataset):\n        self.dataset = dataset\n        self.CLASSES = dataset.CLASSES\n        self.cat2id = {name: i for i, name in enumerate(self.CLASSES)}\n        self.sample_indices = self._get_sample_indices()\n        # self.dataset.data_infos = self.data_infos\n        if hasattr(self.dataset, 'flag'):\n            self.flag = np.array(\n                [self.dataset.flag[ind] for ind in self.sample_indices],\n                dtype=np.uint8)\n\n    def _get_sample_indices(self):\n        \"\"\"Load annotations from ann_file.\n\n        Args:\n            ann_file (str): Path of the annotation file.\n\n        Returns:\n            list[dict]: List of annotations after class sampling.\n        \"\"\"\n\n        class_sample_idxs = {cat_id: [] for cat_id in self.cat2id.values()}\n        for idx in range(len(self.dataset)):\n            sample_cat_ids = self.dataset.get_cat_ids(idx)\n            for cat_id in sample_cat_ids:\n                class_sample_idxs[cat_id].append(idx)\n        duplicated_samples = sum(\n            [len(v) for _, v in class_sample_idxs.items()])\n        class_distribution = {\n            k: len(v) / duplicated_samples\n            for k, v in class_sample_idxs.items()\n        }\n\n        sample_indices = []\n\n        frac = 1.0 / len(self.CLASSES)\n        ratios = [frac / v for v in class_distribution.values()]\n\n        for cls_inds, ratio in zip(list(class_sample_idxs.values()), ratios):\n            sample_indices += np.random.choice(cls_inds,\n                                               int(len(cls_inds) *\n                                                   ratio)).tolist()\n        return sample_indices\n    \n\n    def __getitem__(self, idx):\n        \"\"\"Get item from infos according to the given index.\n\n        Returns:\n            dict: Data dictionary of the corresponding index.\n        \"\"\"\n        ori_idx = self.sample_indices[idx]\n        return self.dataset[ori_idx]\n\n    def __len__(self):\n        \"\"\"Return the length of data infos.\n\n        Returns:\n            int: Length of data infos.\n        \"\"\"\n        return len(self.sample_indices)\n"
  },
  {
    "path": "mmdet3d/datasets/evals/eval_utils.py",
    "content": "import json\nimport torch\nimport tqdm\nfrom typing import List, Dict, Tuple, Callable, Union\nfrom nuscenes import NuScenes\nfrom pyquaternion import Quaternion\nimport numpy as np\nfrom .metric_utils import min_ade, min_fde, miss_rate\n\nfrom nuscenes.utils.splits import create_splits_scenes\nfrom nuscenes.eval.detection.utils import category_to_detection_name\nfrom nuscenes.prediction import PredictHelper, convert_local_coords_to_global\nfrom nuscenes.eval.common.data_classes import EvalBox, EvalBoxes\nfrom nuscenes.eval.detection.data_classes import DetectionBox\nfrom nuscenes.eval.detection.data_classes import DetectionMetricData, DetectionMetricDataList, DetectionMetrics\nfrom nuscenes.eval.common.utils import center_distance, scale_iou, yaw_diff, velocity_l2, attr_acc, cummean\n\ndef category_to_motion_name(category_name: str):\n    \"\"\"\n    Default label mapping from nuScenes to nuScenes detection classes.\n    Note that pedestrian does not include personal_mobility, stroller and wheelchair.\n    :param category_name: Generic nuScenes class.\n    :return: nuScenes detection class.\n    \"\"\"\n    detection_mapping = {\n        'movable_object.barrier': 'barrier',\n        'vehicle.bicycle': 'car',\n        'vehicle.bus.bendy': 'car',\n        'vehicle.bus.rigid': 'car',\n        'vehicle.car': 'car',\n        'vehicle.construction': 'car',\n        'vehicle.motorcycle': 'car',\n        'human.pedestrian.adult': 'pedestrian',\n        'human.pedestrian.child': 'pedestrian',\n        'human.pedestrian.construction_worker': 'pedestrian',\n        'human.pedestrian.police_officer': 'pedestrian',\n        'movable_object.trafficcone': 'barrier',\n        'vehicle.trailer': 'car',\n        'vehicle.truck': 'car'\n    }\n\n    if category_name in detection_mapping:\n        return detection_mapping[category_name]\n    else:\n        return None\n\ndef detection_prediction_category_to_motion_name(category_name: str):\n    \"\"\"\n    Default label mapping from nuScenes to nuScenes detection classes.\n    Note that pedestrian does not include personal_mobility, stroller and wheelchair.\n    :param category_name: Generic nuScenes class.\n    :return: nuScenes detection class.\n    \"\"\"\n    detection_mapping = {\n        'car': 'car',\n        'truck': 'car',\n        'construction_vehicle': 'car',\n        'bus': 'car',\n        'trailer': 'car',\n        'motorcycle': 'car',\n        'bicycle': 'car',\n        'pedestrian': 'pedestrian',\n        'traffic_cone': 'barrier',\n        'barrier': 'barrier',\n    }\n\n    if category_name in detection_mapping:\n        return detection_mapping[category_name]\n    else:\n        return None\n\nclass DetectionMotionMetrics(DetectionMetrics):\n    \"\"\" Stores average precision and true positive metric results. Provides properties to summarize. \"\"\"\n\n    @classmethod\n    def deserialize(cls, content: dict):\n        \"\"\" Initialize from serialized dictionary. \"\"\"\n\n        cfg = DetectionConfig.deserialize(content['cfg'])\n        metrics = cls(cfg=cfg)\n        metrics.add_runtime(content['eval_time'])\n\n        for detection_name, label_aps in content['label_aps'].items():\n            for dist_th, ap in label_aps.items():\n                metrics.add_label_ap(detection_name=detection_name, dist_th=float(dist_th), ap=float(ap))\n\n        for detection_name, label_tps in content['label_tp_errors'].items():\n            for metric_name, tp in label_tps.items():\n                metrics.add_label_tp(detection_name=detection_name, metric_name=metric_name, tp=float(tp))\n\n        return metrics\n\nclass DetectionMotionMetricDataList(DetectionMetricDataList):\n    \"\"\" This stores a set of MetricData in a dict indexed by (name, match-distance). \"\"\"\n    @classmethod\n    def deserialize(cls, content: dict):\n        mdl = cls()\n        for key, md in content.items():\n            name, distance = key.split(':')\n            mdl.set(name, float(distance), DetectionMotionMetricData.deserialize(md))\n        return mdl\n\nclass DetectionMotionMetricData(DetectionMetricData):\n    \"\"\" This class holds accumulated and interpolated data required to calculate the detection metrics. \"\"\"\n\n    nelem = 101\n\n    def __init__(self,\n                 recall: np.array,\n                 precision: np.array,\n                 confidence: np.array,\n                 trans_err: np.array,\n                 vel_err: np.array,\n                 scale_err: np.array,\n                 orient_err: np.array,\n                 attr_err: np.array,\n                 min_ade_err: np.array,\n                 min_fde_err: np.array,\n                 miss_rate_err: np.array):\n\n        # Assert lengths.\n        assert len(recall) == self.nelem\n        assert len(precision) == self.nelem\n        assert len(confidence) == self.nelem\n        assert len(trans_err) == self.nelem\n        assert len(vel_err) == self.nelem\n        assert len(scale_err) == self.nelem\n        assert len(orient_err) == self.nelem\n        assert len(attr_err) == self.nelem\n        assert len(min_ade_err) == self.nelem\n        assert len(min_fde_err) == self.nelem\n        assert len(miss_rate_err) == self.nelem\n\n        # Assert ordering.\n        assert all(confidence == sorted(confidence, reverse=True))  # Confidences should be descending.\n        assert all(recall == sorted(recall))  # Recalls should be ascending.\n\n        # Set attributes explicitly to help IDEs figure out what is going on.\n        self.recall = recall\n        self.precision = precision\n        self.confidence = confidence\n        self.trans_err = trans_err\n        self.vel_err = vel_err\n        self.scale_err = scale_err\n        self.orient_err = orient_err\n        self.attr_err = attr_err\n        self.min_ade_err = min_ade_err\n        self.min_fde_err = min_fde_err\n        self.miss_rate_err = miss_rate_err\n\n    def __eq__(self, other):\n        eq = True\n        for key in self.serialize().keys():\n            eq = eq and np.array_equal(getattr(self, key), getattr(other, key))\n        return eq\n\n    @property\n    def max_recall_ind(self):\n        \"\"\" Returns index of max recall achieved. \"\"\"\n\n        # Last instance of confidence > 0 is index of max achieved recall.\n        non_zero = np.nonzero(self.confidence)[0]\n        if len(non_zero) == 0:  # If there are no matches, all the confidence values will be zero.\n            max_recall_ind = 0\n        else:\n            max_recall_ind = non_zero[-1]\n\n        return max_recall_ind\n\n    @property\n    def max_recall(self):\n        \"\"\" Returns max recall achieved. \"\"\"\n\n        return self.recall[self.max_recall_ind]\n\n    def serialize(self):\n        \"\"\" Serialize instance into json-friendly format. \"\"\"\n        return {\n            'recall': self.recall.tolist(),\n            'precision': self.precision.tolist(),\n            'confidence': self.confidence.tolist(),\n            'trans_err': self.trans_err.tolist(),\n            'vel_err': self.vel_err.tolist(),\n            'scale_err': self.scale_err.tolist(),\n            'orient_err': self.orient_err.tolist(),\n            'attr_err': self.attr_err.tolist(),\n            'min_ade_err': self.min_ade_err.tolist(),\n            'min_fde_err': self.min_fde_err.tolist(),\n            'miss_rate_err': self.miss_rate_err.tolist(),\n        }\n\n    @classmethod\n    def deserialize(cls, content: dict):\n        \"\"\" Initialize from serialized content. \"\"\"\n        return cls(recall=np.array(content['recall']),\n                   precision=np.array(content['precision']),\n                   confidence=np.array(content['confidence']),\n                   trans_err=np.array(content['trans_err']),\n                   vel_err=np.array(content['vel_err']),\n                   scale_err=np.array(content['scale_err']),\n                   orient_err=np.array(content['orient_err']),\n                   attr_err=np.array(content['attr_err']),\n                   min_ade_err=np.array(content['min_ade_err']),\n                   min_fde_err=np.array(content['min_fde_err']),\n                   miss_rate_err=np.array(content['miss_rate_err']))\n\n    @classmethod\n    def no_predictions(cls):\n        \"\"\" Returns a md instance corresponding to having no predictions. \"\"\"\n        return cls(recall=np.linspace(0, 1, cls.nelem),\n                   precision=np.zeros(cls.nelem),\n                   confidence=np.zeros(cls.nelem),\n                   trans_err=np.ones(cls.nelem),\n                   vel_err=np.ones(cls.nelem),\n                   scale_err=np.ones(cls.nelem),\n                   orient_err=np.ones(cls.nelem),\n                   attr_err=np.ones(cls.nelem),\n                   min_ade_err=np.ones(cls.nelem),\n                   min_fde_err=np.ones(cls.nelem),\n                   miss_rate_err=np.ones(cls.nelem))\n\n    @classmethod\n    def random_md(cls):\n        \"\"\" Returns an md instance corresponding to a random results. \"\"\"\n        return cls(recall=np.linspace(0, 1, cls.nelem),\n                   precision=np.random.random(cls.nelem),\n                   confidence=np.linspace(0, 1, cls.nelem)[::-1],\n                   trans_err=np.random.random(cls.nelem),\n                   vel_err=np.random.random(cls.nelem),\n                   scale_err=np.random.random(cls.nelem),\n                   orient_err=np.random.random(cls.nelem),\n                   attr_err=np.random.random(cls.nelem),\n                   min_ade_err=np.random.random(cls.nelem),\n                   min_fde_err=np.random.random(cls.nelem),\n                   miss_rate_err=np.random.random(cls.nelem))\n\n\nclass DetectionMotionBox(DetectionBox):\n    def __init__(self,\n                 sample_token: str = \"\",\n                 translation: Tuple[float, float, float] = (0, 0, 0),\n                 size: Tuple[float, float, float] = (0, 0, 0),\n                 rotation: Tuple[float, float, float, float] = (0, 0, 0, 0),\n                 velocity: Tuple[float, float] = (0, 0),\n                 ego_translation: [float, float, float] = (0, 0, 0),  # Translation to ego vehicle in meters.\n                 num_pts: int = -1,  # Nbr. LIDAR or RADAR inside the box. Only for gt boxes.\n                 detection_name: str = 'car',  # The class name used in the detection challenge.\n                 detection_score: float = -1.0,  # GT samples do not have a score.\n                 tracking_id = -1,\n                 attribute_name: str = '',\n                 traj=None,\n                 traj_scores=None):  # Box attribute. Each box can have at most 1 attribute.\n        super(DetectionBox, self).__init__(sample_token, translation, size, rotation, velocity, ego_translation, num_pts)\n        assert detection_name is not None, 'Error: detection_name cannot be empty!'\n        # assert detection_name in DETECTION_NAMES, 'Error: Unknown detection_name %s' % detection_name\n\n        # assert attribute_name in ATTRIBUTE_NAMES or attribute_name == '', \\\n        #     'Error: Unknown attribute_name %s' % attribute_name\n\n        assert type(detection_score) == float, 'Error: detection_score must be a float!'\n        assert not np.any(np.isnan(detection_score)), 'Error: detection_score may not be NaN!'\n\n        # Assign.\n        self.detection_name = detection_name\n        self.attribute_name = attribute_name\n        self.detection_score = detection_score\n        self.traj = traj\n        self.traj_scores = traj_scores\n        self.traj_index = None\n\n    def __eq__(self, other):\n        return (self.sample_token == other.sample_token and\n                self.translation == other.translation and\n                self.size == other.size and\n                self.rotation == other.rotation and\n                self.velocity == other.velocity and\n                self.ego_translation == other.ego_translation and\n                self.num_pts == other.num_pts and\n                self.detection_name == other.detection_name and\n                self.detection_score == other.detection_score and\n                self.attribute_name == other.attribute_name and \n                np.all(self.traj == other.traj) and\n                np.all(self.traj_scores == other.traj_scores))\n\n    def serialize(self) -> dict:\n        \"\"\" Serialize instance into json-friendly format. \"\"\"\n        return {\n            'sample_token': self.sample_token,\n            'translation': self.translation,\n            'size': self.size,\n            'rotation': self.rotation,\n            'velocity': self.velocity,\n            'ego_translation': self.ego_translation,\n            'num_pts': self.num_pts,\n            'detection_name': self.detection_name,\n            'detection_score': self.detection_score,\n            'attribute_name': self.attribute_name,\n            'traj': self.traj,\n            'traj_scores': self.traj_scores\n        }\n\n    @classmethod\n    def deserialize(cls, content: dict):\n        \"\"\" Initialize from serialized content. \"\"\"\n        return cls(sample_token=content['sample_token'],\n                   translation=tuple(content['translation']),\n                   size=tuple(content['size']),\n                   rotation=tuple(content['rotation']),\n                   velocity=tuple(content['velocity']),\n                   ego_translation=(0.0, 0.0, 0.0) if 'ego_translation' not in content\n                   else tuple(content['ego_translation']),\n                   num_pts=-1 if 'num_pts' not in content else int(content['num_pts']),\n                   detection_name=content['detection_name'],\n                   detection_score=-1.0 if 'detection_score' not in content else float(content['detection_score']),\n                   attribute_name=content['attribute_name'], \n                   traj=content['traj'],\n                   traj_scores=content['traj_scores'])\n\nclass DetectionMotionBox_modified(DetectionMotionBox):\n    def __init__(self, *args, token=None, visibility=None, index=None, **kwargs):\n        '''\n        add annotation token\n        '''\n        super().__init__(*args, **kwargs)\n        self.token = token\n        self.visibility = visibility\n        self.index = index\n\n    def serialize(self) -> dict:\n        \"\"\" Serialize instance into json-friendly format. \"\"\"\n        return {\n            'token': self.token,\n            'sample_token': self.sample_token,\n            'translation': self.translation,\n            'size': self.size,\n            'rotation': self.rotation,\n            'velocity': self.velocity,\n            'ego_translation': self.ego_translation,\n            'num_pts': self.num_pts,\n            'detection_name': self.detection_name,\n            'detection_score': self.detection_score,\n            'attribute_name': self.attribute_name,\n            'visibility': self.visibility,\n            'index': self.index,\n            'traj': self.traj,\n            'traj_scores': self.traj_scores\n        }\n\n    @classmethod\n    def deserialize(cls, content: dict):\n        \"\"\" Initialize from serialized content. \"\"\"\n        return cls(\n            token=content['token'],\n            sample_token=content['sample_token'],\n            translation=tuple(content['translation']),\n            size=tuple(content['size']),\n            rotation=tuple(content['rotation']),\n            velocity=tuple(content['velocity']),\n            ego_translation=(0.0, 0.0, 0.0) if 'ego_translation' not in content\n            else tuple(content['ego_translation']),\n            num_pts=-1 if 'num_pts' not in content else int(content['num_pts']),\n            detection_name=content['detection_name'],\n            detection_score=-1.0 if 'detection_score' not in content else float(content['detection_score']),\n            attribute_name=content['attribute_name'],\n            visibility=content['visibility'],\n            index=content['index'],\n            traj=content['traj'],\n        )\n\n\ndef load_prediction(result_path: str, max_boxes_per_sample: int, box_cls, verbose: bool = False, category_convert_type='detection_category') \\\n        -> Tuple[EvalBoxes, Dict]:\n    \"\"\"\n    Loads object predictions from file.\n    :param result_path: Path to the .json result file provided by the user.\n    :param max_boxes_per_sample: Maximim number of boxes allowed per sample.\n    :param box_cls: Type of box to load, e.g. DetectionBox, DetectionMotionBox or TrackingBox.\n    :param verbose: Whether to print messages to stdout.\n    :return: The deserialized results and meta data.\n    \"\"\"\n\n    # Load from file and check that the format is correct.\n    with open(result_path) as f:\n        data = json.load(f)\n    assert 'results' in data, 'Error: No field `results` in result file. Please note that the result format changed.' \\\n                              'See https://www.nuscenes.org/object-detection for more information.'\n\n    if category_convert_type == 'motion_category':\n        for key in data['results'].keys():\n            for i in range(len(data['results'][key])):\n                data['results'][key][i]['detection_name'] = detection_prediction_category_to_motion_name(data['results'][key][i]['detection_name']) \n    # Deserialize results and get meta data.\n    all_results = EvalBoxes.deserialize(data['results'], box_cls)\n    meta = data['meta']\n    if verbose:\n        print(\"Loaded results from {}. Found detections for {} samples.\"\n              .format(result_path, len(all_results.sample_tokens)))\n\n    # Check that each sample has no more than x predicted boxes.\n    for sample_token in all_results.sample_tokens:\n        assert len(all_results.boxes[sample_token]) <= max_boxes_per_sample, \\\n            \"Error: Only <= %d boxes per sample allowed!\" % max_boxes_per_sample\n\n    return all_results, meta\n\ndef load_gt(nusc: NuScenes, eval_split: str, box_cls, data_infos = None, verbose: bool = False, category_convert_type='detection_category'):\n    \"\"\"\n    Loads ground truth boxes from DB.\n    :param nusc: A NuScenes instance.\n    :param eval_split: The evaluation split for which we load GT boxes.\n    :param box_cls: Type of box to load, e.g. DetectionBox or TrackingBox.\n    :param verbose: Whether to print messages to stdout.\n    :return: The GT boxes.\n    \"\"\"\n    predict_helper = PredictHelper(nusc)\n    # Init.\n    if box_cls == DetectionMotionBox_modified:\n        attribute_map = {a['token']: a['name'] for a in nusc.attribute}\n\n    if verbose:\n        print('Loading annotations for {} split from nuScenes version: {}'.format(eval_split, nusc.version))\n    # Read out all sample_tokens in DB.\n    sample_tokens_all = [s['token'] for s in nusc.sample]\n    assert len(sample_tokens_all) > 0, \"Error: Database has no samples!\"\n\n    # Only keep samples from this split.\n    splits = create_splits_scenes()\n\n    # Check compatibility of split with nusc_version.\n    version = nusc.version\n    if eval_split in {'train', 'val', 'train_detect', 'train_track'}:\n        assert version.endswith('trainval'), \\\n            'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version)\n    elif eval_split in {'mini_train', 'mini_val'}:\n        assert version.endswith('mini'), \\\n            'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version)\n    elif eval_split == 'test':\n        assert version.endswith('test'), \\\n            'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version)\n    else:\n        raise ValueError('Error: Requested split {} which this function cannot map to the correct NuScenes version.'\n                         .format(eval_split))\n\n    if eval_split == 'test':\n        # Check that you aren't trying to cheat :).\n        assert len(nusc.sample_annotation) > 0, \\\n            'Error: You are trying to evaluate on the test set but you do not have the annotations!'\n    index_map = {}\n    for scene in nusc.scene:\n        first_sample_token = scene['first_sample_token']\n        sample = nusc.get('sample', first_sample_token)\n        index_map[first_sample_token] = 1\n        index = 2\n        while sample['next'] != '':\n            sample = nusc.get('sample', sample['next'])\n            index_map[sample['token']] = index\n            index += 1\n\n    sample_tokens = []\n    for sample_token in sample_tokens_all:\n        scene_token = nusc.get('sample', sample_token)['scene_token']\n        scene_record = nusc.get('scene', scene_token)\n        if scene_record['name'] in splits[eval_split]:\n            sample_tokens.append(sample_token)\n\n    all_annotations = EvalBoxes()\n\n    # Load annotations and filter predictions and annotations.\n    tracking_id_set = set()\n    for sample_token in tqdm.tqdm(sample_tokens, leave=verbose):\n\n        sample = nusc.get('sample', sample_token)\n        sample_annotation_tokens = sample['anns']\n        # info = data_infos[sample_token]\n        sample_boxes = []\n        for sample_annotation_token in sample_annotation_tokens:\n\n            sample_annotation = nusc.get('sample_annotation', sample_annotation_token)\n            if box_cls == DetectionMotionBox_modified:\n                # Get label name in detection task and filter unused labels.\n                if category_convert_type == 'detection_category':\n                    detection_name = category_to_detection_name(sample_annotation['category_name'])\n                elif category_convert_type == 'motion_category':\n                    detection_name = category_to_motion_name(sample_annotation['category_name'])\n                else:\n                    raise NotImplementedError\n                if detection_name is None:\n                    continue\n                # Get attribute_name.\n                attr_tokens = sample_annotation['attribute_tokens']\n                attr_count = len(attr_tokens)\n                if attr_count == 0:\n                    attribute_name = ''\n                elif attr_count == 1:\n                    attribute_name = attribute_map[attr_tokens[0]]\n                else:\n                    raise Exception('Error: GT annotations must not have more than one attribute!')\n                instance_token = nusc.get('sample_annotation', sample_annotation['token'])['instance_token']\n\n                fut_traj_global = predict_helper.get_future_for_agent(instance_token, sample_token, seconds=4, in_agent_frame=False)\n                fut_traj_scence_centric = np.zeros((0,))\n                # if fut_traj_local.shape[0] > 0:\n                #     _, boxes, _ = nusc.get_sample_data(sample['data']['LIDAR_TOP'], selected_anntokens=[sample_annotation['token']])\n                #     box = boxes[0]\n                #     trans = box.center\n                #     rot = Quaternion(matrix=box.rotation_matrix)\n                #     fut_traj_scence_centric = convert_local_coords_to_global(fut_traj_local, trans, rot) \n                sample_boxes.append(\n                    box_cls(\n                        token=sample_annotation_token,\n                        sample_token=sample_token,\n                        translation=sample_annotation['translation'],\n                        size=sample_annotation['size'],\n                        rotation=sample_annotation['rotation'],\n                        velocity=nusc.box_velocity(sample_annotation['token'])[:2],\n                        num_pts=sample_annotation['num_lidar_pts'] + sample_annotation['num_radar_pts'],\n                        detection_name=detection_name,\n                        detection_score=-1.0,  # GT samples do not have a score.\n                        attribute_name=attribute_name,\n                        visibility=sample_annotation['visibility_token'],\n                        index=index_map[sample_token],\n                        traj=fut_traj_global,\n                    )\n                )\n            elif box_cls == TrackingBox:\n                assert False\n            else:\n                raise NotImplementedError('Error: Invalid box_cls %s!' % box_cls)\n\n        all_annotations.add_boxes(sample_token, sample_boxes)\n\n    if verbose:\n        print(\"Loaded ground truth annotations for {} samples.\".format(len(all_annotations.sample_tokens)))\n\n    return all_annotations\n\ndef prediction_metrics(gt_box_match, pred_box):\n    pred_traj = np.array(pred_box.traj)\n    gt_traj_steps = gt_box_match.traj.reshape((-1, 2))\n    valid_steps = gt_traj_steps.shape[0]\n    if valid_steps <= 0:\n        return np.array([0]), np.array([0]), 0\n    nmodes = pred_traj.shape[0]\n    pred_steps = pred_traj.shape[1]\n    valid_mask = np.zeros((pred_steps, ))\n    gt_traj = np.zeros((pred_steps, 2))\n    gt_traj[:valid_steps, :] = gt_traj_steps\n    valid_mask[: valid_steps] = 1\n    pred_traj = torch.tensor(pred_traj[None])\n    gt_traj = torch.tensor(gt_traj[None])\n    valid_mask = torch.tensor(valid_mask[None])\n    ade_err, inds = min_ade(pred_traj, gt_traj, 1 - valid_mask)\n    fde_err, inds = min_fde(pred_traj, gt_traj, 1 - valid_mask)\n    mr_err = miss_rate(pred_traj, gt_traj, 1 - valid_mask, dist_thresh=2)\n    return ade_err.numpy(), fde_err.numpy(), mr_err.numpy()\n\n\ndef accumulate(gt_boxes: EvalBoxes,\n               pred_boxes: EvalBoxes,\n               class_name: str,\n               dist_fcn: Callable,\n               dist_th: float,\n               verbose: bool = False) -> DetectionMotionMetricData:\n    \"\"\"\n    Average Precision over predefined different recall thresholds for a single distance threshold.\n    The recall/conf thresholds and other raw metrics will be used in secondary metrics.\n    :param gt_boxes: Maps every sample_token to a list of its sample_annotations.\n    :param pred_boxes: Maps every sample_token to a list of its sample_results.\n    :param class_name: Class to compute AP on.\n    :param dist_fcn: Distance function used to match detections and ground truths.\n    :param dist_th: Distance threshold for a match.\n    :param verbose: If true, print debug messages.\n    :return: (average_prec, metrics). The average precision value and raw data for a number of metrics.\n    \"\"\"\n    # ---------------------------------------------\n    # Organize input and initialize accumulators.\n    # ---------------------------------------------\n\n    # Count the positives.\n    npos = len([1 for gt_box in gt_boxes.all if gt_box.detection_name == class_name])\n    if verbose:\n        print(\"Found {} GT of class {} out of {} total across {} samples.\".\n              format(npos, class_name, len(gt_boxes.all), len(gt_boxes.sample_tokens)))\n\n    # For missing classes in the GT, return a data structure corresponding to no predictions.\n    if npos == 0:\n        return DetectionMotionMetricData.no_predictions(), 0, 0, 0\n\n    # Organize the predictions in a single list.\n    pred_boxes_list = [box for box in pred_boxes.all if box.detection_name == class_name]\n    pred_confs = [box.detection_score for box in pred_boxes_list]\n\n    if verbose:\n        print(\"Found {} PRED of class {} out of {} total across {} samples.\".\n              format(len(pred_confs), class_name, len(pred_boxes.all), len(pred_boxes.sample_tokens)))\n\n    # Sort by confidence.\n    sortind = [i for (v, i) in sorted((v, i) for (i, v) in enumerate(pred_confs))][::-1]\n\n    # Do the actual matching.\n    tp = []  # Accumulator of true positives.\n    fp = []  # Accumulator of false positives.\n    conf = []  # Accumulator of confidences.\n\n    # match_data holds the extra metrics we calculate for each match.\n    match_data = {'trans_err': [],\n                  'vel_err': [],\n                  'scale_err': [],\n                  'orient_err': [],\n                  'attr_err': [],\n                  'conf': [],\n                  'min_ade_err': [],\n                  'min_fde_err': [],\n                  'miss_rate_err': []}\n\n    # ---------------------------------------------\n    # Match and accumulate match data.\n    # ---------------------------------------------\n\n    taken = set()  # Initially no gt bounding box is matched.\n    for ind in sortind:\n        pred_box = pred_boxes_list[ind]\n        min_dist = np.inf\n        match_gt_idx = None\n\n        for gt_idx, gt_box in enumerate(gt_boxes[pred_box.sample_token]):\n\n            # Find closest match among ground truth boxes\n            if gt_box.detection_name == class_name and not (pred_box.sample_token, gt_idx) in taken:\n                this_distance = dist_fcn(gt_box, pred_box)\n                if this_distance < min_dist:\n                    min_dist = this_distance\n                    match_gt_idx = gt_idx\n\n        # If the closest match is close enough according to threshold we have a match!\n        is_match = min_dist < dist_th\n\n        if is_match:\n            taken.add((pred_box.sample_token, match_gt_idx))\n\n            #  Update tp, fp and confs.\n            tp.append(1)\n            fp.append(0)\n            conf.append(pred_box.detection_score)\n\n            # Since it is a match, update match data also.\n            gt_box_match = gt_boxes[pred_box.sample_token][match_gt_idx]\n            \n            match_data['trans_err'].append(center_distance(gt_box_match, pred_box))\n            match_data['vel_err'].append(velocity_l2(gt_box_match, pred_box))\n            match_data['scale_err'].append(1 - scale_iou(gt_box_match, pred_box))\n\n            # Barrier orientation is only determined up to 180 degree. (For cones orientation is discarded later)\n            period = np.pi if class_name == 'barrier' else 2 * np.pi\n            match_data['orient_err'].append(yaw_diff(gt_box_match, pred_box, period=period))\n\n            match_data['attr_err'].append(1 - attr_acc(gt_box_match, pred_box))\n            minade, minfde, m_r = prediction_metrics(gt_box_match, pred_box)\n            \n            match_data['min_ade_err'].append(minade)\n            match_data['min_fde_err'].append(minfde)\n            match_data['miss_rate_err'].append(m_r)\n            match_data['conf'].append(pred_box.detection_score)\n\n        else:\n            # No match. Mark this as a false positive.\n            tp.append(0)\n            fp.append(1)\n            conf.append(pred_box.detection_score)\n\n    # Check if we have any matches. If not, just return a \"no predictions\" array.\n    if len(match_data['trans_err']) == 0:\n        return DetectionMotionMetricData.no_predictions(), 0, 0, 0\n\n    # ---------------------------------------------\n    # Calculate and interpolate precision and recall\n    # ---------------------------------------------\n\n    # Accumulate.\n    N_tp = np.sum(tp)\n    N_fp = np.sum(fp)\n    tp = np.cumsum(tp).astype(float)\n    fp = np.cumsum(fp).astype(float)\n    conf = np.array(conf)\n\n\n    # Calculate precision and recall.\n    prec = tp / (fp + tp)\n    rec = tp / float(npos)\n\n    rec_interp = np.linspace(0, 1, DetectionMotionMetricData.nelem)  # 101 steps, from 0% to 100% recall.\n    prec = np.interp(rec_interp, rec, prec, right=0)\n    conf = np.interp(rec_interp, rec, conf, right=0)\n    rec = rec_interp\n\n    # ---------------------------------------------\n    # Re-sample the match-data to match, prec, recall and conf.\n    # ---------------------------------------------\n\n    for key in match_data.keys():\n        if key == \"conf\":\n            continue  # Confidence is used as reference to align with fp and tp. So skip in this step.\n\n        else:\n            # For each match_data, we first calculate the accumulated mean.\n            tmp = cummean(np.array(match_data[key]))\n\n            # Then interpolate based on the confidences. (Note reversing since np.interp needs increasing arrays)\n            match_data[key] = np.interp(conf[::-1], match_data['conf'][::-1], tmp[::-1])[::-1]\n\n    # ---------------------------------------------\n    # Done. Instantiate MetricData and return\n    # ---------------------------------------------\n    return DetectionMotionMetricData(recall=rec,\n                               precision=prec,\n                               confidence=conf,\n                               trans_err=match_data['trans_err'],\n                               vel_err=match_data['vel_err'],\n                               scale_err=match_data['scale_err'],\n                               orient_err=match_data['orient_err'],\n                               attr_err=match_data['attr_err'],\n                               min_ade_err=match_data['min_ade_err'],\n                               min_fde_err=match_data['min_fde_err'],\n                               miss_rate_err=match_data['miss_rate_err']\n                               ), N_tp, N_fp, npos\n\n\n\ndef accumulate_motion(gt_boxes: EvalBoxes,\n               pred_boxes: EvalBoxes,\n               class_name: str,\n               dist_fcn: Callable,\n               traj_fcn: Callable,\n               dist_th: float,\n               traj_dist_th: float,\n               verbose: bool = False,\n               final_step: float = 12) -> DetectionMotionMetricData:\n    \"\"\"\n    Average Precision over predefined different recall thresholds for a single distance threshold.\n    The recall/conf thresholds and other raw metrics will be used in secondary metrics.\n    :param gt_boxes: Maps every sample_token to a list of its sample_annotations.\n    :param pred_boxes: Maps every sample_token to a list of its sample_results.\n    :param class_name: Class to compute AP on.\n    :param dist_fcn: Distance function used to match detections and ground truths.\n    :param dist_th: Distance threshold for a match.\n    :param verbose: If true, print debug messages.\n    :return: (average_prec, metrics). The average precision value and raw data for a number of metrics.\n    \"\"\"\n    # ---------------------------------------------\n    # Organize input and initialize accumulators.\n    # ---------------------------------------------\n\n    # Count the positives.\n    npos = len([1 for gt_box in gt_boxes.all if gt_box.detection_name == class_name])\n    if verbose:\n        print(\"Found {} GT of class {} out of {} total across {} samples.\".\n              format(npos, class_name, len(gt_boxes.all), len(gt_boxes.sample_tokens)))\n\n    # For missing classes in the GT, return a data structure corresponding to no predictions.\n    if npos == 0:\n        return DetectionMotionMetricData.no_predictions(), 0, 0, 0\n\n    # \n    # Organize the predictions in a single list.\n    pred_boxes_list = []\n    pred_confs = []\n\n    pred_boxes_list = [box for box in pred_boxes.all if box.detection_name == class_name]\n    pred_confs = [box.detection_score for box in pred_boxes_list]\n    # for box in pred_boxes.all:\n    #     if box.detection_name == class_name:\n    #         box.traj_scores = np.exp(box.traj_scores)\n    #         for i in range(len(box.traj_scores)):\n    #             box.traj_index = i\n    #             pred_boxes_list.append(box)\n    # pred_confs = [box.detection_score * box.traj_scores[box.traj_index]  for box in pred_boxes_list]\n\n    if verbose:\n        print(\"Found {} PRED of class {} out of {} total across {} samples.\".\n              format(len(pred_confs), class_name, len(pred_boxes.all), len(pred_boxes.sample_tokens)))\n\n    # Sort by confidence.\n    sortind = [i for (v, i) in sorted((v, i) for (i, v) in enumerate(pred_confs))][::-1]\n\n    # Do the actual matching.\n    tp = []  # Accumulator of true positives.\n    fp = []  # Accumulator of false positives.\n    conf = []  # Accumulator of confidences.\n\n    # match_data holds the extra metrics we calculate for each match.\n    match_data = {'trans_err': [],\n                  'vel_err': [],\n                  'scale_err': [],\n                  'orient_err': [],\n                  'attr_err': [],\n                  'conf': [],\n                  'min_ade_err': [],\n                  'min_fde_err': [],\n                  'miss_rate_err': []}\n\n    # ---------------------------------------------\n    # Match and accumulate match data.\n    # ---------------------------------------------\n\n    taken = set()  # Initially no gt bounding box is matched.\n    for ind in sortind:\n        pred_box = pred_boxes_list[ind]\n        min_dist = np.inf\n        match_gt_idx = None\n\n        for gt_idx, gt_box in enumerate(gt_boxes[pred_box.sample_token]):\n\n            # Find closest match among ground truth boxes\n            if gt_box.detection_name == class_name and not (pred_box.sample_token, gt_idx) in taken:\n                this_distance = dist_fcn(gt_box, pred_box)\n                if this_distance < min_dist:\n                    min_dist = this_distance\n                    match_gt_idx = gt_idx\n                    fde_distance = traj_fcn(gt_box, pred_box, final_step)\n        # If the closest match is close enough according to threshold we have a match!\n        is_match = min_dist < dist_th and fde_distance < traj_dist_th\n\n        if is_match:\n            taken.add((pred_box.sample_token, match_gt_idx))\n\n            #  Update tp, fp and confs.\n            tp.append(1)\n            fp.append(0)\n            conf.append(pred_box.detection_score)\n\n            # Since it is a match, update match data also.\n            gt_box_match = gt_boxes[pred_box.sample_token][match_gt_idx]\n            \n            match_data['trans_err'].append(center_distance(gt_box_match, pred_box))\n            match_data['vel_err'].append(velocity_l2(gt_box_match, pred_box))\n            match_data['scale_err'].append(1 - scale_iou(gt_box_match, pred_box))\n\n            # Barrier orientation is only determined up to 180 degree. (For cones orientation is discarded later)\n            period = np.pi if class_name == 'barrier' else 2 * np.pi\n            match_data['orient_err'].append(yaw_diff(gt_box_match, pred_box, period=period))\n\n            match_data['attr_err'].append(1 - attr_acc(gt_box_match, pred_box))\n            minade, minfde, m_r = prediction_metrics(gt_box_match, pred_box)\n            \n            match_data['min_ade_err'].append(minade)\n            match_data['min_fde_err'].append(minfde)\n            match_data['miss_rate_err'].append(m_r)\n            match_data['conf'].append(pred_box.detection_score)\n\n        else:\n            # No match. Mark this as a false positive.\n            tp.append(0)\n            fp.append(1)\n            conf.append(pred_box.detection_score)\n            # conf.append(pred_box.detection_score * pred_box.traj_scores[pred_box.traj_index])\n    # \n    # Check if we have any matches. If not, just return a \"no predictions\" array.\n    if len(match_data['trans_err']) == 0:\n        return DetectionMotionMetricData.no_predictions(), 0, 0, 0\n\n    # ---------------------------------------------\n    # Calculate and interpolate precision and recall\n    # ---------------------------------------------\n\n    # Accumulate.\n    N_tp = np.sum(tp)\n    N_fp = np.sum(fp)\n    tp = np.cumsum(tp).astype(float)\n    fp = np.cumsum(fp).astype(float)\n    conf = np.array(conf)\n\n    # Calculate precision and recall.\n    prec = tp / (fp + tp)\n    rec = tp / float(npos)\n\n\n\n    rec_interp = np.linspace(0, 1, DetectionMotionMetricData.nelem)  # 101 steps, from 0% to 100% recall.\n    prec = np.interp(rec_interp, rec, prec, right=0)\n    conf = np.interp(rec_interp, rec, conf, right=0)\n    rec = rec_interp\n\n    # ---------------------------------------------\n    # Re-sample the match-data to match, prec, recall and conf.\n    # ---------------------------------------------\n\n    for key in match_data.keys():\n        if key == \"conf\":\n            continue  # Confidence is used as reference to align with fp and tp. So skip in this step.\n\n        else:\n            # For each match_data, we first calculate the accumulated mean.\n            tmp = cummean(np.array(match_data[key]))\n\n            # Then interpolate based on the confidences. (Note reversing since np.interp needs increasing arrays)\n            match_data[key] = np.interp(conf[::-1], match_data['conf'][::-1], tmp[::-1])[::-1]\n\n    # ---------------------------------------------\n    # Done. Instantiate MetricData and return\n    # ---------------------------------------------\n    return DetectionMotionMetricData(recall=rec,\n                               precision=prec,\n                               confidence=conf,\n                               trans_err=match_data['trans_err'],\n                               vel_err=match_data['vel_err'],\n                               scale_err=match_data['scale_err'],\n                               orient_err=match_data['orient_err'],\n                               attr_err=match_data['attr_err'],\n                               min_ade_err=match_data['min_ade_err'],\n                               min_fde_err=match_data['min_fde_err'],\n                               miss_rate_err=match_data['miss_rate_err']\n                               ), N_tp, N_fp, npos"
  },
  {
    "path": "mmdet3d/datasets/evals/map_api.py",
    "content": "# nuScenes dev-kit.\n# Code written by Sergi Adipraja Widjaja, 2019.\n# + Map mask by Kiwoo Shin, 2019.\n# + Methods operating on NuScenesMap and NuScenes by Holger Caesar, 2019.\n\nimport json\nimport os\nimport random\nfrom typing import Dict, List, Tuple, Optional, Union\n\nimport cv2\nimport math\nimport descartes\nimport matplotlib.gridspec as gridspec\nimport matplotlib.pyplot as plt\nimport numpy as np\nfrom PIL import Image\nfrom matplotlib.axes import Axes\nfrom matplotlib.figure import Figure\nfrom matplotlib.patches import Rectangle, Arrow\nfrom mpl_toolkits.axes_grid1.inset_locator import mark_inset\nfrom pyquaternion import Quaternion\nfrom shapely import affinity\nfrom shapely.geometry import Polygon, MultiPolygon, LineString, Point, box\nfrom tqdm import tqdm\n\nfrom nuscenes.map_expansion.arcline_path_utils import discretize_lane, ArcLinePath\nfrom nuscenes.map_expansion.bitmap import BitMap\nfrom nuscenes.nuscenes import NuScenes\nfrom nuscenes.utils.geometry_utils import view_points\nfrom functools import partial\n\n# Recommended style to use as the plots will show grids.\nplt.style.use('seaborn-whitegrid')\n\n# Define a map geometry type for polygons and lines.\nGeometry = Union[Polygon, LineString]\n\nlocations = ['singapore-onenorth', 'singapore-hollandvillage', 'singapore-queenstown', 'boston-seaport']\n\n\nclass NuScenesMap:\n    \"\"\"\n    NuScenesMap database class for querying and retrieving information from the semantic maps.\n    Before using this class please use the provided tutorial `map_expansion_tutorial.ipynb`.\n\n    Below you can find the map origins (south western corner, in [lat, lon]) for each of the 4 maps in nuScenes:\n    boston-seaport: [42.336849169438615, -71.05785369873047]\n    singapore-onenorth: [1.2882100868743724, 103.78475189208984]\n    singapore-hollandvillage: [1.2993652317780957, 103.78217697143555]\n    singapore-queenstown: [1.2782562240223188, 103.76741409301758]\n\n    The dimensions of the maps are as follows ([width, height] in meters):\n    singapore-onenorth: [1585.6, 2025.0]\n    singapore-hollandvillage: [2808.3, 2922.9]\n    singapore-queenstown: [3228.6, 3687.1]\n    boston-seaport: [2979.5, 2118.1]\n    The rasterized semantic maps (e.g. singapore-onenorth.png) published with nuScenes v1.0 have a scale of 10px/m,\n    hence the above numbers are the image dimensions divided by 10.\n\n    We use the same WGS 84 Web Mercator (EPSG:3857) projection as Google Maps/Earth.\n    \"\"\"\n    def __init__(self,\n                 dataroot: str = '/data/sets/nuscenes',\n                 map_name: str = 'singapore-onenorth'):\n        \"\"\"\n        Loads the layers, create reverse indices and shortcuts, initializes the explorer class.\n        :param dataroot: Path to the layers in the form of a .json file.\n        :param map_name: Which map out of `singapore-onenorth`, `singepore-hollandvillage`, `singapore-queenstown`,\n        `boston-seaport` that we want to load.\n        \"\"\"\n        assert map_name in locations, 'Error: Unknown map name %s!' % map_name\n\n        self.dataroot = dataroot\n        self.map_name = map_name\n\n        self.geometric_layers = ['polygon', 'line', 'node']\n\n        # These are the non-geometric layers which have polygons as the geometric descriptors.\n        self.non_geometric_polygon_layers = ['drivable_area', 'road_segment', 'road_block', 'lane', 'ped_crossing',\n                                             'walkway', 'stop_line', 'carpark_area']\n\n        # We want to be able to search for lane connectors, but not render them.\n        self.lookup_polygon_layers = self.non_geometric_polygon_layers + ['lane_connector']\n\n        # These are the non-geometric layers which have line strings as the geometric descriptors.\n        self.non_geometric_line_layers = ['road_divider', 'lane_divider', 'traffic_light']\n        self.non_geometric_layers = self.non_geometric_polygon_layers + self.non_geometric_line_layers\n        self.layer_names = self.geometric_layers + self.lookup_polygon_layers + self.non_geometric_line_layers\n\n        # Load the selected map.\n        self.json_fname = os.path.join(self.dataroot, 'maps', 'expansion', '{}.json'.format(self.map_name))\n        with open(self.json_fname, 'r') as fh:\n            self.json_obj = json.load(fh)\n\n        # Parse the map version and print an error for deprecated maps.\n        if 'version' in self.json_obj:\n            self.version = self.json_obj['version']\n        else:\n            self.version = '1.0'\n        if self.version < '1.3':\n            raise Exception('Error: You are using an outdated map version (%s)! '\n                            'Please go to https://www.nuscenes.org/download to download the latest map!')\n\n        self.canvas_edge = self.json_obj['canvas_edge']\n        self._load_layers()\n        self._make_token2ind()\n        self._make_shortcuts()\n\n        self.explorer = NuScenesMapExplorer(self)\n\n    def _load_layer(self, layer_name: str) -> List[dict]:\n        \"\"\"\n        Returns a list of records corresponding to the layer name.\n        :param layer_name: Name of the layer that will be loaded.\n        :return: A list of records corresponding to a layer.\n        \"\"\"\n        return self.json_obj[layer_name]\n\n    def _load_layer_dict(self, layer_name: str) -> Dict[str, Union[dict, list]]:\n        \"\"\"\n        Returns a dict of records corresponding to the layer name.\n        :param layer_name: Name of the layer that will be loaded.\n        :return: A dict of records corresponding to a layer.\n        \"\"\"\n        return self.json_obj[layer_name]\n\n    def _load_layers(self) -> None:\n        \"\"\" Loads each available layer. \"\"\"\n\n        # Explicit assignment of layers are necessary to help the IDE determine valid class members.\n        self.polygon = self._load_layer('polygon')\n        self.line = self._load_layer('line')\n        self.node = self._load_layer('node')\n        self.drivable_area = self._load_layer('drivable_area')\n        self.road_segment = self._load_layer('road_segment')\n        self.road_block = self._load_layer('road_block')\n        self.lane = self._load_layer('lane')\n        self.ped_crossing = self._load_layer('ped_crossing')\n        self.walkway = self._load_layer('walkway')\n        self.stop_line = self._load_layer('stop_line')\n        self.carpark_area = self._load_layer('carpark_area')\n        self.road_divider = self._load_layer('road_divider')\n        self.lane_divider = self._load_layer('lane_divider')\n        self.traffic_light = self._load_layer('traffic_light')\n\n        self.arcline_path_3: Dict[str, List[dict]] = self._load_layer_dict('arcline_path_3')\n        self.connectivity: Dict[str, dict] = self._load_layer_dict('connectivity')\n        self.lane_connector = self._load_layer('lane_connector')\n\n    def _make_token2ind(self) -> None:\n        \"\"\" Store the mapping from token to layer index for each layer. \"\"\"\n        self._token2ind = dict()\n        for layer_name in self.layer_names:\n            self._token2ind[layer_name] = dict()\n\n            for ind, member in enumerate(getattr(self, layer_name)):\n                self._token2ind[layer_name][member['token']] = ind\n\n    def _make_shortcuts(self) -> None:\n        \"\"\" Makes the record shortcuts. \"\"\"\n\n        # Makes a shortcut between non geometric records to their nodes.\n        for layer_name in self.non_geometric_polygon_layers:\n            if layer_name == 'drivable_area':  # Drivable area has more than one geometric representation.\n                pass\n            else:\n                for record in self.__dict__[layer_name]:\n                    polygon_obj = self.get('polygon', record['polygon_token'])\n                    record['exterior_node_tokens'] = polygon_obj['exterior_node_tokens']\n                    record['holes'] = polygon_obj['holes']\n\n        for layer_name in self.non_geometric_line_layers:\n            for record in self.__dict__[layer_name]:\n                record['node_tokens'] = self.get('line', record['line_token'])['node_tokens']\n\n        # Makes a shortcut between stop lines to their cues, there's different cues for different types of stop line.\n        # Refer to `_get_stop_line_cue()` for details.\n        for record in self.stop_line:\n            cue = self._get_stop_line_cue(record)\n            record['cue'] = cue\n\n        # Makes a shortcut between lanes to their lane divider segment nodes.\n        for record in self.lane:\n            record['left_lane_divider_segment_nodes'] = [self.get('node', segment['node_token']) for segment in\n                                                         record['left_lane_divider_segments']]\n            record['right_lane_divider_segment_nodes'] = [self.get('node', segment['node_token']) for segment in\n                                                          record['right_lane_divider_segments']]\n\n    def _get_stop_line_cue(self, stop_line_record: dict) -> List[dict]:\n        \"\"\"\n        Get the different cues for different types of stop lines.\n        :param stop_line_record: A single stop line record.\n        :return: The cue for that stop line.\n        \"\"\"\n        if stop_line_record['stop_line_type'] in ['PED_CROSSING', 'TURN_STOP']:\n            return [self.get('ped_crossing', token) for token in stop_line_record['ped_crossing_tokens']]\n        elif stop_line_record['stop_line_type'] in ['STOP_SIGN', 'YIELD']:\n            return []\n        elif stop_line_record['stop_line_type'] == 'TRAFFIC_LIGHT':\n            return [self.get('traffic_light', token) for token in stop_line_record['traffic_light_tokens']]\n\n    def get(self, layer_name: str, token: str) -> dict:\n        \"\"\"\n        Returns a record from the layer in constant runtime.\n        :param layer_name: Name of the layer that we are interested in.\n        :param token: Token of the record.\n        :return: A single layer record.\n        \"\"\"\n        assert layer_name in self.layer_names, \"Layer {} not found\".format(layer_name)\n\n        return getattr(self, layer_name)[self.getind(layer_name, token)]\n\n    def getind(self, layer_name: str, token: str) -> int:\n        \"\"\"\n        This returns the index of the record in a layer in constant runtime.\n        :param layer_name: Name of the layer we are interested in.\n        :param token: Token of the record.\n        :return: The index of the record in the layer, layer is an array.\n        \"\"\"\n        return self._token2ind[layer_name][token]\n\n    def render_record(self,\n                      layer_name: str,\n                      token: str,\n                      alpha: float = 0.5,\n                      figsize: Tuple[float, float] = None,\n                      other_layers: List[str] = None,\n                      bitmap: Optional[BitMap] = None) -> Tuple[Figure, Tuple[Axes, Axes]]:\n        \"\"\"\n         Render a single map record. By default will also render 3 layers which are `drivable_area`, `lane`,\n         and `walkway` unless specified by `other_layers`.\n         :param layer_name: Name of the layer that we are interested in.\n         :param token: Token of the record that you want to render.\n         :param alpha: The opacity of each layer that gets rendered.\n         :param figsize: Size of the whole figure.\n         :param other_layers: What other layers to render aside from the one specified in `layer_name`.\n         :param bitmap: Optional BitMap object to render below the other map layers.\n         :return: The matplotlib figure and axes of the rendered layers.\n         \"\"\"\n        return self.explorer.render_record(layer_name, token, alpha,\n                                           figsize=figsize, other_layers=other_layers, bitmap=bitmap)\n\n    def render_layers(self,\n                      layer_names: List[str],\n                      alpha: float = 0.5,\n                      figsize: Union[None, float, Tuple[float, float]] = None,\n                      tokens: List[str] = None,\n                      bitmap: Optional[BitMap] = None) -> Tuple[Figure, Axes]:\n        \"\"\"\n        Render a list of layer names.\n        :param layer_names: A list of layer names.\n        :param alpha: The opacity of each layer that gets rendered.\n        :param figsize: Size of the whole figure.\n        :param tokens: Optional list of tokens to render. None means all tokens are rendered.\n        :param bitmap: Optional BitMap object to render below the other map layers.\n        :return: The matplotlib figure and axes of the rendered layers.\n        \"\"\"\n        return self.explorer.render_layers(layer_names, alpha,\n                                           figsize=figsize, tokens=tokens, bitmap=bitmap)\n\n    def render_map_patch(self,\n                         box_coords: Tuple[float, float, float, float],\n                         layer_names: List[str] = None,\n                         alpha: float = 0.5,\n                         figsize: Tuple[int, int] = (15, 15),\n                         render_egoposes_range: bool = True,\n                         render_legend: bool = True,\n                         bitmap: Optional[BitMap] = None) -> Tuple[Figure, Axes]:\n        \"\"\"\n        Renders a rectangular patch specified by `box_coords`. By default renders all layers.\n        :param box_coords: The rectangular patch coordinates (x_min, y_min, x_max, y_max).\n        :param layer_names: All the non geometric layers that we want to render.\n        :param alpha: The opacity of each layer.\n        :param figsize: Size of the whole figure.\n        :param render_egoposes_range: Whether to render a rectangle around all ego poses.\n        :param render_legend: Whether to render the legend of map layers.\n        :param bitmap: Optional BitMap object to render below the other map layers.\n        :return: The matplotlib figure and axes of the rendered layers.\n        \"\"\"\n        return self.explorer.render_map_patch(box_coords, layer_names=layer_names, alpha=alpha, figsize=figsize,\n                                              render_egoposes_range=render_egoposes_range,\n                                              render_legend=render_legend, bitmap=bitmap)\n\n    def render_map_in_image(self,\n                            nusc: NuScenes,\n                            sample_token: str,\n                            camera_channel: str = 'CAM_FRONT',\n                            alpha: float = 0.3,\n                            patch_radius: float = 10000,\n                            min_polygon_area: float = 1000,\n                            render_behind_cam: bool = True,\n                            render_outside_im: bool = True,\n                            layer_names: List[str] = None,\n                            verbose: bool = True,\n                            out_path: str = None) -> Tuple[Figure, Axes]:\n        \"\"\"\n        Render a nuScenes camera image and overlay the polygons for the specified map layers.\n        Note that the projections are not always accurate as the localization is in 2d.\n        :param nusc: The NuScenes instance to load the image from.\n        :param sample_token: The image's corresponding sample_token.\n        :param camera_channel: Camera channel name, e.g. 'CAM_FRONT'.\n        :param alpha: The transparency value of the layers to render in [0, 1].\n        :param patch_radius: The radius in meters around the ego car in which to select map records.\n        :param min_polygon_area: Minimum area a polygon needs to have to be rendered.\n        :param render_behind_cam: Whether to render polygons where any point is behind the camera.\n        :param render_outside_im: Whether to render polygons where any point is outside the image.\n        :param layer_names: The names of the layers to render, e.g. ['lane'].\n            If set to None, the recommended setting will be used.\n        :param verbose: Whether to print to stdout.\n        :param out_path: Optional path to save the rendered figure to disk.\n        \"\"\"\n        return self.explorer.render_map_in_image(\n            nusc, sample_token, camera_channel=camera_channel, alpha=alpha,\n            patch_radius=patch_radius, min_polygon_area=min_polygon_area,\n            render_behind_cam=render_behind_cam, render_outside_im=render_outside_im,\n            layer_names=layer_names, verbose=verbose, out_path=out_path)\n\n    def get_map_mask_in_image(self,\n                              nusc: NuScenes,\n                              sample_token: str,\n                              camera_channel: str = 'CAM_FRONT',\n                              alpha: float = 0.3,\n                              patch_radius: float = 10000,\n                              min_polygon_area: float = 1000,\n                              render_behind_cam: bool = True,\n                              render_outside_im: bool = True,\n                              layer_names: List[str] = None,\n                              verbose: bool = False,\n                              out_path: str = None):\n        \"\"\"\n        Render a nuScenes camera image and overlay the polygons for the specified map layers.\n        Note that the projections are not always accurate as the localization is in 2d.\n        :param nusc: The NuScenes instance to load the image from.\n        :param sample_token: The image's corresponding sample_token.\n        :param camera_channel: Camera channel name, e.g. 'CAM_FRONT'.\n        :param alpha: The transparency value of the layers to render in [0, 1].\n        :param patch_radius: The radius in meters around the ego car in which to select map records.\n        :param min_polygon_area: Minimum area a polygon needs to have to be rendered.\n        :param render_behind_cam: Whether to render polygons where any point is behind the camera.\n        :param render_outside_im: Whether to render polygons where any point is outside the image.\n        :param layer_names: The names of the layers to render, e.g. ['lane'].\n            If set to None, the recommended setting will be used.\n        :param verbose: Whether to print to stdout.\n        :param out_path: Optional path to save the rendered figure to disk.\n        \"\"\"\n        return self.explorer.get_map_mask_in_image(\n            nusc, sample_token, camera_channel=camera_channel, alpha=alpha,\n            patch_radius=patch_radius, min_polygon_area=min_polygon_area,\n            render_behind_cam=render_behind_cam, render_outside_im=render_outside_im,\n            layer_names=layer_names, verbose=verbose, out_path=out_path)\n\n    def render_egoposes_on_fancy_map(self,\n                                     nusc: NuScenes,\n                                     scene_tokens: List = None,\n                                     verbose: bool = True,\n                                     out_path: str = None,\n                                     render_egoposes: bool = True,\n                                     render_egoposes_range: bool = True,\n                                     render_legend: bool = True,\n                                     bitmap: Optional[BitMap] = None) -> Tuple[np.ndarray, Figure, Axes]:\n        \"\"\"\n        Renders each ego pose of a list of scenes on the map (around 40 poses per scene).\n        This method is heavily inspired by NuScenes.render_egoposes_on_map(), but uses the map expansion pack maps.\n        :param nusc: The NuScenes instance to load the ego poses from.\n        :param scene_tokens: Optional list of scene tokens corresponding to the current map location.\n        :param verbose: Whether to show status messages and progress bar.\n        :param out_path: Optional path to save the rendered figure to disk.\n        :param render_egoposes: Whether to render ego poses.\n        :param render_egoposes_range: Whether to render a rectangle around all ego poses.\n        :param render_legend: Whether to render the legend of map layers.\n        :param bitmap: Optional BitMap object to render below the other map layers.\n        :return: <np.float32: n, 2>. Returns a matrix with n ego poses in global map coordinates.\n        \"\"\"\n        return self.explorer.render_egoposes_on_fancy_map(nusc, scene_tokens=scene_tokens,\n                                                          verbose=verbose, out_path=out_path,\n                                                          render_egoposes=render_egoposes,\n                                                          render_egoposes_range=render_egoposes_range,\n                                                          render_legend=render_legend, bitmap=bitmap)\n\n    def render_centerlines(self,\n                           resolution_meters: float = 0.5,\n                           figsize: Union[None, float, Tuple[float, float]] = None,\n                           bitmap: Optional[BitMap] = None) -> Tuple[Figure, Axes]:\n        \"\"\"\n        Render the centerlines of all lanes and lane connectors.\n        :param resolution_meters: How finely to discretize the lane. Smaller values ensure curved\n            lanes are properly represented.\n        :param figsize: Size of the figure.\n        :param bitmap: Optional BitMap object to render below the other map layers.\n        \"\"\"\n        return self.explorer.render_centerlines(resolution_meters=resolution_meters, figsize=figsize, bitmap=bitmap)\n\n    def render_map_mask(self,\n                        patch_box: Tuple[float, float, float, float],\n                        patch_angle: float,\n                        layer_names: List[str] = None,\n                        canvas_size: Tuple[int, int] = (100, 100),\n                        figsize: Tuple[int, int] = (15, 15),\n                        n_row: int = 2) -> Tuple[Figure, List[Axes]]:\n        \"\"\"\n        Render map mask of the patch specified by patch_box and patch_angle.\n        :param patch_box: Patch box defined as [x_center, y_center, height, width].\n        :param patch_angle: Patch orientation in degrees.\n        :param layer_names: A list of layer names to be returned.\n        :param canvas_size: Size of the output mask (h, w).\n        :param figsize: Size of the figure.\n        :param n_row: Number of rows with plots.\n        :return: The matplotlib figure and a list of axes of the rendered layers.\n        \"\"\"\n        return self.explorer.render_map_mask(patch_box, patch_angle,\n                                             layer_names=layer_names, canvas_size=canvas_size,\n                                             figsize=figsize, n_row=n_row)\n\n    def get_map_mask(self,\n                     patch_box: Optional[Tuple[float, float, float, float]],\n                     patch_angle: float,\n                     layer_names: List[str] = None,\n                     canvas_size: Optional[Tuple[int, int]] = (100, 100)) -> np.ndarray:\n        \"\"\"\n        Return list of map mask layers of the specified patch.\n        :param patch_box: Patch box defined as [x_center, y_center, height, width]. If None, this plots the entire map.\n        :param patch_angle: Patch orientation in degrees. North-facing corresponds to 0.\n        :param layer_names: A list of layer names to be extracted, or None for all non-geometric layers.\n        :param canvas_size: Size of the output mask (h, w). If None, we use the default resolution of 10px/m.\n        :return: Stacked numpy array of size [c x h x w] with c channels and the same width/height as the canvas.\n        \"\"\"\n        return self.explorer.get_map_mask(patch_box, patch_angle, layer_names=layer_names, canvas_size=canvas_size)\n\n    def get_map_geom(self,\n                     patch_box: Tuple[float, float, float, float],\n                     patch_angle: float,\n                     layer_names: List[str]) -> List[Tuple[str, List[Geometry]]]:\n        \"\"\"\n        Returns a list of geometries in the specified patch_box.\n        These are unscaled, but aligned with the patch angle.\n        :param patch_box: Patch box defined as [x_center, y_center, height, width].\n        :param patch_angle: Patch orientation in degrees.\n                            North-facing corresponds to 0.\n        :param layer_names: A list of layer names to be extracted, or None for all non-geometric layers.\n        :return: List of layer names and their corresponding geometries.\n        \"\"\"\n        return self.explorer.get_map_geom(patch_box, patch_angle, layer_names)\n\n    def get_records_in_patch(self,\n                             box_coords: Tuple[float, float, float, float],\n                             layer_names: List[str] = None,\n                             mode: str = 'intersect') -> Dict[str, List[str]]:\n        \"\"\"\n        Get all the record token that intersects or is within a particular rectangular patch.\n        :param box_coords: The rectangular patch coordinates (x_min, y_min, x_max, y_max).\n        :param layer_names: Names of the layers that we want to retrieve in a particular patch. By default will always\n        look at the all non geometric layers.\n        :param mode: \"intersect\" will return all non geometric records that intersects the patch, \"within\" will return\n        all non geometric records that are within the patch.\n        :return: Dictionary of layer_name - tokens pairs.\n        \"\"\"\n        return self.explorer.get_records_in_patch(box_coords, layer_names=layer_names, mode=mode)\n\n    def is_record_in_patch(self,\n                           layer_name: str,\n                           token: str,\n                           box_coords: Tuple[float, float, float, float],\n                           mode: str = 'intersect') -> bool:\n        \"\"\"\n        Query whether a particular record is in a rectangular patch\n        :param layer_name: The layer name of the record.\n        :param token: The record token.\n        :param box_coords: The rectangular patch coordinates (x_min, y_min, x_max, y_max).\n        :param mode: \"intersect\" means it will return True if the geometric object intersects the patch, \"within\" will\n                     return True if the geometric object is within the patch.\n        :return: Boolean value on whether a particular record intersects or within a particular patch.\n        \"\"\"\n        return self.explorer.is_record_in_patch(layer_name, token, box_coords, mode=mode)\n\n    def layers_on_point(self, x: float, y: float, layer_names: List[str] = None) -> Dict[str, str]:\n        \"\"\"\n        Returns all the polygonal layers that a particular point is on.\n        :param x: x coordinate of the point of interest.\n        :param y: y coordinate of the point of interest.\n        :param layer_names: The names of the layers to search for.\n        :return: All the polygonal layers that a particular point is on. {<layer name>: <list of tokens>}\n        \"\"\"\n        return self.explorer.layers_on_point(x, y, layer_names=layer_names)\n\n    def record_on_point(self, x: float, y: float, layer_name: str) -> str:\n        \"\"\"\n        Query what record of a layer a particular point is on.\n        :param x: x coordinate of the point of interest.\n        :param y: y coordinate of the point of interest.\n        :param layer_name: The non geometric polygonal layer name that we are interested in.\n        :return: The first token of a layer a particular point is on or '' if no layer is found.\n        \"\"\"\n        return self.explorer.record_on_point(x, y, layer_name)\n\n    def extract_polygon(self, polygon_token: str) -> Polygon:\n        \"\"\"\n        Construct a shapely Polygon object out of a polygon token.\n        :param polygon_token: The token of the polygon record.\n        :return: The polygon wrapped in a shapely Polygon object.\n        \"\"\"\n        return self.explorer.extract_polygon(polygon_token)\n\n    def extract_line(self, line_token: str) -> LineString:\n        \"\"\"\n        Construct a shapely LineString object out of a line token.\n        :param line_token: The token of the line record.\n        :return: The line wrapped in a LineString object.\n        \"\"\"\n        return self.explorer.extract_line(line_token)\n\n    def get_bounds(self, layer_name: str, token: str) -> Tuple[float, float, float, float]:\n        \"\"\"\n        Get the bounds of the geometric object that corresponds to a non geometric record.\n        :param layer_name: Name of the layer that we are interested in.\n        :param token: Token of the record.\n        :return: min_x, min_y, max_x, max_y of of the line representation.\n        \"\"\"\n        return self.explorer.get_bounds(layer_name, token)\n\n    def get_records_in_radius(self, x: float, y: float, radius: float,\n                              layer_names: List[str], mode: str = 'intersect') -> Dict[str, List[str]]:\n        \"\"\"\n        Get all the record tokens that intersect a square patch of side length 2*radius centered on (x,y).\n        :param x: X-coordinate in global frame.\n        :param y: y-coordinate in global frame.\n        :param radius: All records within radius meters of point (x, y) will be returned.\n        :param layer_names: Names of the layers that we want to retrieve. By default will always\n        look at the all non geometric layers.\n        :param mode: \"intersect\" will return all non geometric records that intersects the patch, \"within\" will return\n        all non geometric records that are within the patch.\n        :return: Dictionary of layer_name - tokens pairs.\n        \"\"\"\n\n        patch = (x - radius, y - radius, x + radius, y + radius)\n        return self.explorer.get_records_in_patch(patch, layer_names, mode=mode)\n\n    def discretize_centerlines(self, resolution_meters: float) -> List[np.array]:\n        \"\"\"\n        Discretize the centerlines of lanes and lane connectors.\n        :param resolution_meters: How finely to discretize the lane. Smaller values ensure curved\n            lanes are properly represented.\n        :return: A list of np.arrays with x, y and z values for each point.\n        \"\"\"\n        pose_lists = []\n        for lane in self.lane + self.lane_connector:\n            my_lane = self.arcline_path_3.get(lane['token'], [])\n            discretized = np.array(discretize_lane(my_lane, resolution_meters))\n            pose_lists.append(discretized)\n\n        return pose_lists\n\n    def discretize_lanes(self, tokens: List[str],\n                         resolution_meters: float) -> Dict[str, List[Tuple[float, float, float]]]:\n        \"\"\"\n        Discretizes a list of lane/lane connector tokens.\n        :param tokens: List of lane and/or lane connector record tokens. Can be retrieved with\n            get_records_in_radius or get_records_in_patch.\n        :param resolution_meters: How finely to discretize the splines.\n        :return: Mapping from lane/lane connector token to sequence of poses along the lane.\n        \"\"\"\n\n        return {ID: discretize_lane(self.arcline_path_3.get(ID, []), resolution_meters) for ID in tokens}\n\n    def _get_connected_lanes(self, lane_token: str, incoming_outgoing: str) -> List[str]:\n        \"\"\"\n        Helper for getting the lanes connected to a given lane\n        :param lane_token: Token for the lane.\n        :param incoming_outgoing: Whether to get incoming or outgoing lanes\n        :return: List of lane tokens this lane is connected to.\n        \"\"\"\n\n        if lane_token not in self.connectivity:\n            raise ValueError(f\"{lane_token} is not a valid lane.\")\n\n        return self.connectivity[lane_token][incoming_outgoing]\n\n    def get_outgoing_lane_ids(self, lane_token: str) -> List[str]:\n        \"\"\"\n        Get the out-going lanes.\n        :param lane_token: Token for the lane.\n        :return: List of lane tokens that start at the end of this lane.\n        \"\"\"\n\n        return self._get_connected_lanes(lane_token, 'outgoing')\n\n    def get_incoming_lane_ids(self, lane_token: str) -> List[str]:\n        \"\"\"\n        Get the incoming lanes.\n        :param lane_token: Token for the lane.\n        :return: List of lane tokens that end at the start of this lane.\n        \"\"\"\n\n        return self._get_connected_lanes(lane_token, 'incoming')\n\n    def get_arcline_path(self, lane_token: str) -> List[ArcLinePath]:\n        \"\"\"\n        Get the arcline path representation for a lane.\n        Note: This function was previously called `get_lane()`, but renamed to avoid confusion between lanes and\n              arcline paths.\n        :param lane_token: Token for the lane.\n        :return: Arc line path representation of the lane.\n        \"\"\"\n\n        arcline_path = self.arcline_path_3.get(lane_token)\n        if not arcline_path:\n            raise ValueError(f'Error: Lane with token {lane_token} does not have a valid arcline path!')\n\n        return arcline_path\n\n    def get_closest_lane(self, x: float, y: float, radius: float = 5) -> str:\n        \"\"\"\n        Get closest lane id within a radius of query point. The distance from a point (x, y) to a lane is\n        the minimum l2 distance from (x, y) to a point on the lane.\n        :param x: X coordinate in global coordinate frame.\n        :param y: Y Coordinate in global coordinate frame.\n        :param radius: Radius around point to consider.\n        :return: Lane id of closest lane within radius.\n        \"\"\"\n\n        lanes = self.get_records_in_radius(x, y, radius, ['lane', 'lane_connector'])\n        lanes = lanes['lane'] + lanes['lane_connector']\n\n        discrete_points = self.discretize_lanes(lanes, 0.5)\n\n        current_min = np.inf\n\n        min_id = \"\"\n        for lane_id, points in discrete_points.items():\n\n            distance = np.linalg.norm(np.array(points)[:, :2] - [x, y], axis=1).min()\n            if distance <= current_min:\n                current_min = distance\n                min_id = lane_id\n\n        return min_id\n\n    def render_next_roads(self,\n                          x: float,\n                          y: float,\n                          alpha: float = 0.5,\n                          figsize: Union[None, float, Tuple[float, float]] = None,\n                          bitmap: Optional[BitMap] = None) -> Tuple[Figure, Axes]:\n        \"\"\"\n        Renders the possible next roads from a point of interest.\n        :param x: x coordinate of the point of interest.\n        :param y: y coordinate of the point of interest.\n        :param alpha: The opacity of each layer that gets rendered.\n        :param figsize: Size of the whole figure.\n        :param bitmap: Optional BitMap object to render below the other map layers.\n        \"\"\"\n        return self.explorer.render_next_roads(x, y, alpha, figsize=figsize, bitmap=bitmap)\n\n    def get_next_roads(self, x: float, y: float) -> Dict[str, List[str]]:\n        \"\"\"\n        Get the possible next roads from a point of interest.\n        Returns road_segment, road_block and lane.\n        :param x: x coordinate of the point of interest.\n        :param y: y coordinate of the point of interest.\n        :return: Dictionary of layer_name - tokens pairs.\n        \"\"\"\n        # Filter out irrelevant layers.\n        road_layers = ['road_segment', 'road_block', 'lane']\n        layers = self.explorer.layers_on_point(x, y)\n        rel_layers = {layer: layers[layer] for layer in road_layers}\n\n        # Pick most fine-grained road layer (lane, road_block, road_segment) object that contains the point.\n        rel_layer = None\n        rel_token = None\n        for layer in road_layers[::-1]:\n            if rel_layers[layer] != '':\n                rel_layer = layer\n                rel_token = rel_layers[layer]\n                break\n        assert rel_layer is not None, 'Error: No suitable layer in the specified point location!'\n\n        # Get all records that overlap with the bounding box of the selected road.\n        box_coords = self.explorer.get_bounds(rel_layer, rel_token)\n        intersect = self.explorer.get_records_in_patch(box_coords, road_layers, mode='intersect')\n\n        # Go through all objects within the bounding box.\n        result = {layer: [] for layer in road_layers}\n        if rel_layer == 'road_segment':\n            # For road segments, we do not have a direction.\n            # Return objects that have ANY exterior points in common with the relevant layer.\n            rel_exterior_nodes = self.get(rel_layer, rel_token)['exterior_node_tokens']\n            for layer in road_layers:\n                for token in intersect[layer]:\n                    exterior_nodes = self.get(layer, token)['exterior_node_tokens']\n                    if any(n in exterior_nodes for n in rel_exterior_nodes) \\\n                            and token != rel_layers[layer]:\n                        result[layer].append(token)\n        else:\n            # For lanes and road blocks, the next road is indicated by the edge line.\n            # Return objects where ALL edge line nodes are included in the exterior nodes.\n            to_edge_line = self.get(rel_layer, rel_token)['to_edge_line_token']\n            to_edge_nodes = self.get('line', to_edge_line)['node_tokens']\n            for layer in road_layers:\n                for token in intersect[layer]:\n                    exterior_nodes = self.get(layer, token)['exterior_node_tokens']\n                    if all(n in exterior_nodes for n in to_edge_nodes) \\\n                            and token != rel_layers[layer]:\n                        result[layer].append(token)\n        return result\n\n\nclass NuScenesMapExplorer:\n    \"\"\" Helper class to explore the nuScenes map data. \"\"\"\n    def __init__(self,\n                 map_api: NuScenesMap,\n                 representative_layers: Tuple[str] = ('drivable_area', 'lane', 'walkway'),\n                 color_map: dict = None):\n        \"\"\"\n        :param map_api: NuScenesMap database class.\n        :param representative_layers: These are the layers that we feel are representative of the whole mapping data.\n        :param color_map: Color map.\n        \"\"\"\n        # Mutable default argument.\n        if color_map is None:\n            color_map = dict(drivable_area='#a6cee3',\n                             road_segment='#1f78b4',\n                             road_block='#b2df8a',\n                             lane='#33a02c',\n                             ped_crossing='#fb9a99',\n                             walkway='#e31a1c',\n                             stop_line='#fdbf6f',\n                             carpark_area='#ff7f00',\n                             road_divider='#cab2d6',\n                             lane_divider='#6a3d9a',\n                             traffic_light='#7e772e')\n\n        self.map_api = map_api\n        self.representative_layers = representative_layers\n        self.color_map = color_map\n\n        self.canvas_max_x = self.map_api.canvas_edge[0]\n        self.canvas_min_x = 0\n        self.canvas_max_y = self.map_api.canvas_edge[1]\n        self.canvas_min_y = 0\n        self.canvas_aspect_ratio = (self.canvas_max_x - self.canvas_min_x) / (self.canvas_max_y - self.canvas_min_y)\n\n    def render_centerlines(self,\n                           resolution_meters: float,\n                           figsize: Union[None, float, Tuple[float, float]] = None,\n                           bitmap: Optional[BitMap] = None) -> Tuple[Figure, Axes]:\n        \"\"\"\n        Render the centerlines of all lanes and lane connectors.\n        :param resolution_meters: How finely to discretize the lane. Smaller values ensure curved\n            lanes are properly represented.\n        :param figsize: Size of the figure.\n        :param bitmap: Optional BitMap object to render below the other map layers.\n        \"\"\"\n        # Discretize all lanes and lane connectors.\n        pose_lists = self.map_api.discretize_centerlines(resolution_meters)\n\n        # Render connectivity lines.\n        fig = plt.figure(figsize=self._get_figsize(figsize))\n        ax = fig.add_axes([0, 0, 1, 1 / self.canvas_aspect_ratio])\n\n        if bitmap is not None:\n            bitmap.render(self.map_api.canvas_edge, ax)\n\n        for pose_list in pose_lists:\n            if len(pose_list) > 0:\n                plt.plot(pose_list[:, 0], pose_list[:, 1])\n\n        return fig, ax\n\n    def render_map_mask(self,\n                        patch_box: Tuple[float, float, float, float],\n                        patch_angle: float,\n                        layer_names: List[str],\n                        canvas_size: Tuple[int, int],\n                        figsize: Tuple[int, int],\n                        n_row: int = 2) -> Tuple[Figure, List[Axes]]:\n        \"\"\"\n        Render map mask of the patch specified by patch_box and patch_angle.\n        :param patch_box: Patch box defined as [x_center, y_center, height, width].\n        :param patch_angle: Patch orientation in degrees.\n        :param layer_names: A list of layer names to be extracted.\n        :param canvas_size: Size of the output mask (h, w).\n        :param figsize: Size of the figure.\n        :param n_row: Number of rows with plots.\n        :return: The matplotlib figure and a list of axes of the rendered layers.\n        \"\"\"\n        if layer_names is None:\n            layer_names = self.map_api.non_geometric_layers\n\n        map_mask = self.get_map_mask(patch_box, patch_angle, layer_names, canvas_size)\n\n        # If no canvas_size is specified, retrieve the default from the output of get_map_mask.\n        if canvas_size is None:\n            canvas_size = map_mask.shape[1:]\n\n        fig = plt.figure(figsize=figsize)\n        ax = fig.add_axes([0, 0, 1, 1])\n        ax.set_xlim(0, canvas_size[1])\n        ax.set_ylim(0, canvas_size[0])\n\n        n_col = len(map_mask) // n_row\n        gs = gridspec.GridSpec(n_row, n_col)\n        gs.update(wspace=0.025, hspace=0.05)\n        for i in range(len(map_mask)):\n            r = i // n_col\n            c = i - r * n_col\n            subax = plt.subplot(gs[r, c])\n            subax.imshow(map_mask[i], origin='lower')\n            subax.text(canvas_size[0] * 0.5, canvas_size[1] * 1.1, layer_names[i])\n            subax.grid(False)\n\n        return fig, fig.axes\n\n    def get_map_geom(self,\n                     patch_box: Tuple[float, float, float, float],\n                     patch_angle: float,\n                     layer_names: List[str]) -> List[Tuple[str, List[Geometry]]]:\n        \"\"\"\n        Returns a list of geometries in the specified patch_box.\n        These are unscaled, but aligned with the patch angle.\n        :param patch_box: Patch box defined as [x_center, y_center, height, width].\n        :param patch_angle: Patch orientation in degrees.\n                            North-facing corresponds to 0.\n        :param layer_names: A list of layer names to be extracted, or None for all non-geometric layers.\n        :return: List of layer names and their corresponding geometries.\n        \"\"\"\n        # If None, return all geometric layers.\n        if layer_names is None:\n            layer_names = self.map_api.non_geometric_layers\n\n        # Get each layer name and geometry and store them in a list.\n        map_geom = []\n        for layer_name in layer_names:\n            layer_geom = self._get_layer_geom(patch_box, patch_angle, layer_name)\n            if layer_geom is None:\n                continue\n            map_geom.append((layer_name, layer_geom))\n\n        return map_geom\n\n    def map_geom_to_mask(self,\n                         map_geom: List[Tuple[str, List[Geometry]]],\n                         local_box: Tuple[float, float, float, float],\n                         canvas_size: Tuple[int, int]) -> np.ndarray:\n        \"\"\"\n        Return list of map mask layers of the specified patch.\n        :param map_geom: List of layer names and their corresponding geometries.\n        :param local_box: The local patch box defined as (x_center, y_center, height, width), where typically\n            x_center = y_center = 0.\n        :param canvas_size: Size of the output mask (h, w).\n        :return: Stacked numpy array of size [c x h x w] with c channels and the same height/width as the canvas.\n        \"\"\"\n        # Get each layer mask and stack them into a numpy tensor.\n        map_mask = []\n        for layer_name, layer_geom in map_geom:\n            layer_mask = self._layer_geom_to_mask(layer_name, layer_geom, local_box, canvas_size)\n            if layer_mask is not None:\n                map_mask.append(layer_mask)\n\n        return np.array(map_mask)\n\n    def get_map_mask(self,\n                     patch_box: Optional[Tuple[float, float, float, float]],\n                     patch_angle: float,\n                     layer_names: List[str] = None,\n                     canvas_size: Tuple[int, int] = (100, 100)) -> np.ndarray:\n        \"\"\"\n        Return list of map mask layers of the specified patch.\n        :param patch_box: Patch box defined as [x_center, y_center, height, width]. If None, this plots the entire map.\n        :param patch_angle: Patch orientation in degrees. North-facing corresponds to 0.\n        :param layer_names: A list of layer names to be extracted, or None for all non-geometric layers.\n        :param canvas_size: Size of the output mask (h, w). If None, we use the default resolution of 10px/m.\n        :return: Stacked numpy array of size [c x h x w] with c channels and the same width/height as the canvas.\n        \"\"\"\n        # For some combination of parameters, we need to know the size of the current map.\n        if self.map_api.map_name == 'singapore-onenorth':\n            map_dims = [1585.6, 2025.0]\n        elif self.map_api.map_name == 'singapore-hollandvillage':\n            map_dims = [2808.3, 2922.9]\n        elif self.map_api.map_name == 'singapore-queenstown':\n            map_dims = [3228.6, 3687.1]\n        elif self.map_api.map_name == 'boston-seaport':\n            map_dims = [2979.5, 2118.1]\n        else:\n            raise Exception('Error: Invalid map!')\n\n        # If None, return the entire map.\n        if patch_box is None:\n            patch_box = [map_dims[0] / 2, map_dims[1] / 2, map_dims[1], map_dims[0]]\n\n        # If None, return all geometric layers.\n        if layer_names is None:\n            layer_names = self.map_api.non_geometric_layers\n\n        # If None, return the specified patch in the original scale of 10px/m.\n        if canvas_size is None:\n            map_scale = 10\n            canvas_size = np.array((patch_box[2], patch_box[3])) * map_scale\n            canvas_size = tuple(np.round(canvas_size).astype(np.int32))\n\n        # Get geometry of each layer.\n        map_geom = self.get_map_geom(patch_box, patch_angle, layer_names)\n\n        # Convert geometry of each layer into mask and stack them into a numpy tensor.\n        # Convert the patch box from global coordinates to local coordinates by setting the center to (0, 0).\n        local_box = (0.0, 0.0, patch_box[2], patch_box[3])\n        map_mask = self.map_geom_to_mask(map_geom, local_box, canvas_size)\n        assert np.all(map_mask.shape[1:] == canvas_size)\n\n        return map_mask\n\n    def render_record(self,\n                      layer_name: str,\n                      token: str,\n                      alpha: float = 0.5,\n                      figsize: Union[None, float, Tuple[float, float]] = None,\n                      other_layers: List[str] = None,\n                      bitmap: Optional[BitMap] = None) -> Tuple[Figure, Tuple[Axes, Axes]]:\n        \"\"\"\n        Render a single map record.\n        By default will also render 3 layers which are `drivable_area`, `lane`, and `walkway` unless specified by\n        `other_layers`.\n        :param layer_name: Name of the layer that we are interested in.\n        :param token: Token of the record that you want to render.\n        :param alpha: The opacity of each layer that gets rendered.\n        :param figsize: Size of the whole figure.\n        :param other_layers: What other layers to render aside from the one specified in `layer_name`.\n        :param bitmap: Optional BitMap object to render below the other map layers.\n        :return: The matplotlib figure and axes of the rendered layers.\n        \"\"\"\n        if other_layers is None:\n            other_layers = list(self.representative_layers)\n\n        for other_layer in other_layers:\n            if other_layer not in self.map_api.non_geometric_layers:\n                raise ValueError(\"{} is not a non geometric layer\".format(layer_name))\n\n        x1, y1, x2, y2 = self.map_api.get_bounds(layer_name, token)\n\n        local_width = x2 - x1\n        local_height = y2 - y1\n        assert local_height > 0, 'Error: Map has 0 height!'\n        local_aspect_ratio = local_width / local_height\n\n        # We obtained the values 0.65 and 0.66 by trials.\n        fig = plt.figure(figsize=self._get_figsize(figsize))\n        global_ax = fig.add_axes([0, 0, 0.65, 0.65 / self.canvas_aspect_ratio])\n        local_ax = fig.add_axes([0.66, 0.66 / self.canvas_aspect_ratio, 0.34, 0.34 / local_aspect_ratio])\n\n        # To make sure the sequence of the layer overlays is always consistent after typesetting set().\n        random.seed('nutonomy')\n\n        if bitmap is not None:\n            bitmap.render(self.map_api.canvas_edge, global_ax)\n            bitmap.render(self.map_api.canvas_edge, local_ax)\n\n        layer_names = other_layers + [layer_name]\n        layer_names = list(set(layer_names))\n\n        for layer in layer_names:\n            self._render_layer(global_ax, layer, alpha)\n\n        for layer in layer_names:\n            self._render_layer(local_ax, layer, alpha)\n\n        if layer_name == 'drivable_area':\n            # Bad output aesthetically if we add spacing between the objects and the axes for drivable area.\n            local_ax_xlim = (x1, x2)\n            local_ax_ylim = (y1, y2)\n        else:\n            # Add some spacing between the object and the axes.\n            local_ax_xlim = (x1 - local_width / 3, x2 + local_width / 3)\n            local_ax_ylim = (y1 - local_height / 3, y2 + local_height / 3)\n\n            # Draws the rectangular patch on the local_ax.\n            local_ax.add_patch(Rectangle((x1, y1), local_width, local_height, linestyle='-.', color='red', fill=False,\n                                         lw=2))\n\n        local_ax.set_xlim(*local_ax_xlim)\n        local_ax.set_ylim(*local_ax_ylim)\n        local_ax.set_title('Local View')\n\n        global_ax.set_xlim(self.canvas_min_x, self.canvas_max_x)\n        global_ax.set_ylim(self.canvas_min_y, self.canvas_max_y)\n        global_ax.set_title('Global View')\n        global_ax.legend()\n\n        # Adds the zoomed in effect to the plot.\n        mark_inset(global_ax, local_ax, loc1=2, loc2=4)\n\n        return fig, (global_ax, local_ax)\n\n    def render_layers(self,\n                      layer_names: List[str],\n                      alpha: float,\n                      figsize: Union[None, float, Tuple[float, float]],\n                      tokens: List[str] = None,\n                      bitmap: Optional[BitMap] = None) -> Tuple[Figure, Axes]:\n        \"\"\"\n        Render a list of layers.\n        :param layer_names: A list of layer names.\n        :param alpha: The opacity of each layer.\n        :param figsize: Size of the whole figure.\n        :param tokens: Optional list of tokens to render. None means all tokens are rendered.\n        :param bitmap: Optional BitMap object to render below the other map layers.\n        :return: The matplotlib figure and axes of the rendered layers.\n        \"\"\"\n        fig = plt.figure(figsize=self._get_figsize(figsize))\n        ax = fig.add_axes([0, 0, 1, 1 / self.canvas_aspect_ratio])\n\n        ax.set_xlim(self.canvas_min_x, self.canvas_max_x)\n        ax.set_ylim(self.canvas_min_y, self.canvas_max_y)\n\n        if bitmap is not None:\n            bitmap.render(self.map_api.canvas_edge, ax)\n\n        layer_names = list(set(layer_names))\n        for layer_name in layer_names:\n            self._render_layer(ax, layer_name, alpha, tokens)\n\n        ax.legend()\n\n        return fig, ax\n\n    def render_map_patch(self,\n                         box_coords: Tuple[float, float, float, float],\n                         layer_names: List[str] = None,\n                         alpha: float = 0.5,\n                         figsize: Tuple[float, float] = (15, 15),\n                         render_egoposes_range: bool = True,\n                         render_legend: bool = True,\n                         bitmap: Optional[BitMap] = None) -> Tuple[Figure, Axes]:\n        \"\"\"\n        Renders a rectangular patch specified by `box_coords`. By default renders all layers.\n        :param box_coords: The rectangular patch coordinates (x_min, y_min, x_max, y_max).\n        :param layer_names: All the non geometric layers that we want to render.\n        :param alpha: The opacity of each layer.\n        :param figsize: Size of the whole figure.\n        :param render_egoposes_range: Whether to render a rectangle around all ego poses.\n        :param render_legend: Whether to render the legend of map layers.\n        :param bitmap: Optional BitMap object to render below the other map layers.\n        :return: The matplotlib figure and axes of the rendered layers.\n        \"\"\"\n        x_min, y_min, x_max, y_max = box_coords\n\n        if layer_names is None:\n            layer_names = self.map_api.non_geometric_layers\n\n        fig = plt.figure(figsize=figsize)\n\n        local_width = x_max - x_min\n        local_height = y_max - y_min\n        assert local_height > 0, 'Error: Map patch has 0 height!'\n        local_aspect_ratio = local_width / local_height\n\n        ax = fig.add_axes([0, 0, 1, 1 / local_aspect_ratio])\n\n        if bitmap is not None:\n            bitmap.render(self.map_api.canvas_edge, ax)\n\n        for layer_name in layer_names:\n            self._render_layer(ax, layer_name, alpha)\n\n        x_margin = np.minimum(local_width / 4, 50)\n        y_margin = np.minimum(local_height / 4, 10)\n        ax.set_xlim(x_min - x_margin, x_max + x_margin)\n        ax.set_ylim(y_min - y_margin, y_max + y_margin)\n\n        if render_egoposes_range:\n            ax.add_patch(Rectangle((x_min, y_min), local_width, local_height, fill=False, linestyle='-.', color='red',\n                                   lw=2))\n            ax.text(x_min + local_width / 100, y_min + local_height / 2, \"%g m\" % local_height,\n                    fontsize=14, weight='bold')\n            ax.text(x_min + local_width / 2, y_min + local_height / 100, \"%g m\" % local_width,\n                    fontsize=14, weight='bold')\n\n        if render_legend:\n            ax.legend(frameon=True, loc='upper right')\n\n        return fig, ax\n\n    def render_map_in_image(self,\n                            nusc: NuScenes,\n                            sample_token: str,\n                            camera_channel: str = 'CAM_FRONT',\n                            alpha: float = 0.3,\n                            patch_radius: float = 10000,\n                            min_polygon_area: float = 1000,\n                            render_behind_cam: bool = True,\n                            render_outside_im: bool = True,\n                            layer_names: List[str] = None,\n                            verbose: bool = True,\n                            out_path: str = None) -> Tuple[Figure, Axes]:\n        \"\"\"\n        Render a nuScenes camera image and overlay the polygons for the specified map layers.\n        Note that the projections are not always accurate as the localization is in 2d.\n        :param nusc: The NuScenes instance to load the image from.\n        :param sample_token: The image's corresponding sample_token.\n        :param camera_channel: Camera channel name, e.g. 'CAM_FRONT'.\n        :param alpha: The transparency value of the layers to render in [0, 1].\n        :param patch_radius: The radius in meters around the ego car in which to select map records.\n        :param min_polygon_area: Minimum area a polygon needs to have to be rendered.\n        :param render_behind_cam: Whether to render polygons where any point is behind the camera.\n        :param render_outside_im: Whether to render polygons where any point is outside the image.\n        :param layer_names: The names of the layers to render, e.g. ['lane'].\n            If set to None, the recommended setting will be used.\n        :param verbose: Whether to print to stdout.\n        :param out_path: Optional path to save the rendered figure to disk.\n        \"\"\"\n        near_plane = 1e-8\n\n        if verbose:\n            print('Warning: Note that the projections are not always accurate as the localization is in 2d.')\n\n        # Default layers.\n        if layer_names is None:\n            layer_names = ['road_segment', 'lane', 'ped_crossing', 'walkway', 'stop_line', 'carpark_area']\n\n        # Check layers whether we can render them.\n        for layer_name in layer_names:\n            assert layer_name in self.map_api.non_geometric_polygon_layers, \\\n                'Error: Can only render non-geometry polygons: %s' % layer_names\n\n        # Check that NuScenesMap was loaded for the correct location.\n        sample_record = nusc.get('sample', sample_token)\n        scene_record = nusc.get('scene', sample_record['scene_token'])\n        log_record = nusc.get('log', scene_record['log_token'])\n        log_location = log_record['location']\n        assert self.map_api.map_name == log_location, \\\n            'Error: NuScenesMap loaded for location %s, should be %s!' % (self.map_api.map_name, log_location)\n\n        # Grab the front camera image and intrinsics.\n        cam_token = sample_record['data'][camera_channel]\n        cam_record = nusc.get('sample_data', cam_token)\n        cam_path = nusc.get_sample_data_path(cam_token)\n        im = Image.open(cam_path)\n        im_size = im.size\n        cs_record = nusc.get('calibrated_sensor', cam_record['calibrated_sensor_token'])\n        cam_intrinsic = np.array(cs_record['camera_intrinsic'])\n\n        # Retrieve the current map.\n        poserecord = nusc.get('ego_pose', cam_record['ego_pose_token'])\n        ego_pose = poserecord['translation']\n        box_coords = (\n            ego_pose[0] - patch_radius,\n            ego_pose[1] - patch_radius,\n            ego_pose[0] + patch_radius,\n            ego_pose[1] + patch_radius,\n        )\n        records_in_patch = self.get_records_in_patch(box_coords, layer_names, 'intersect')\n\n        # Init axes.\n        fig = plt.figure(figsize=(9, 16))\n        ax = fig.add_axes([0, 0, 1, 1])\n        ax.set_xlim(0, im_size[0])\n        ax.set_ylim(0, im_size[1])\n        ax.imshow(im)\n\n        # Retrieve and render each record.\n        for layer_name in layer_names:\n            for token in records_in_patch[layer_name]:\n                record = self.map_api.get(layer_name, token)\n                if layer_name == 'drivable_area':\n                    polygon_tokens = record['polygon_tokens']\n                else:\n                    polygon_tokens = [record['polygon_token']]\n\n                for polygon_token in polygon_tokens:\n                    polygon = self.map_api.extract_polygon(polygon_token)\n\n                    # Convert polygon nodes to pointcloud with 0 height.\n                    points = np.array(polygon.exterior.xy)\n                    points = np.vstack((points, np.zeros((1, points.shape[1]))))\n\n                    # Transform into the ego vehicle frame for the timestamp of the image.\n                    points = points - np.array(poserecord['translation']).reshape((-1, 1))\n                    points = np.dot(Quaternion(poserecord['rotation']).rotation_matrix.T, points)\n\n                    # Transform into the camera.\n                    points = points - np.array(cs_record['translation']).reshape((-1, 1))\n                    points = np.dot(Quaternion(cs_record['rotation']).rotation_matrix.T, points)\n\n                    # Remove points that are partially behind the camera.\n                    depths = points[2, :]\n                    behind = depths < near_plane\n                    if np.all(behind):\n                        continue\n\n                    if render_behind_cam:\n                        # Perform clipping on polygons that are partially behind the camera.\n                        points = NuScenesMapExplorer._clip_points_behind_camera(points, near_plane)\n                    elif np.any(behind):\n                        # Otherwise ignore any polygon that is partially behind the camera.\n                        continue\n\n                    # Ignore polygons with less than 3 points after clipping.\n                    if len(points) == 0 or points.shape[1] < 3:\n                        continue\n\n                    # Take the actual picture (matrix multiplication with camera-matrix + renormalization).\n                    points = view_points(points, cam_intrinsic, normalize=True)\n\n                    # Skip polygons where all points are outside the image.\n                    # Leave a margin of 1 pixel for aesthetic reasons.\n                    inside = np.ones(points.shape[1], dtype=bool)\n                    inside = np.logical_and(inside, points[0, :] > 1)\n                    inside = np.logical_and(inside, points[0, :] < im.size[0] - 1)\n                    inside = np.logical_and(inside, points[1, :] > 1)\n                    inside = np.logical_and(inside, points[1, :] < im.size[1] - 1)\n                    if render_outside_im:\n                        if np.all(np.logical_not(inside)):\n                            continue\n                    else:\n                        if np.any(np.logical_not(inside)):\n                            continue\n\n                    points = points[:2, :]\n                    points = [(p0, p1) for (p0, p1) in zip(points[0], points[1])]\n                    polygon_proj = Polygon(points)\n\n                    # Filter small polygons\n                    if polygon_proj.area < min_polygon_area:\n                        continue\n\n                    label = layer_name\n                    ax.add_patch(descartes.PolygonPatch(polygon_proj, fc=self.color_map[layer_name], alpha=alpha,\n                                                        label=label))\n\n        # Display the image.\n        plt.axis('off')\n        ax.invert_yaxis()\n\n        if out_path is not None:\n            plt.tight_layout()\n            plt.savefig(out_path, bbox_inches='tight', pad_inches=0)\n\n        return fig, ax\n\n    @staticmethod\n    def points_transform(points, poserecord, cs_record, cam_intrinsic, im_size, near_plane=1e-8,\n                         render_behind_cam=True, render_outside_im=True):\n        points = np.vstack((points, np.zeros((1, points.shape[1]))))\n\n        # Transform into the ego vehicle frame for the timestamp of the image.\n        points = points - np.array(poserecord['translation']).reshape((-1, 1))\n        points = np.dot(Quaternion(poserecord['rotation']).rotation_matrix.T, points)\n\n        # Transform into the camera.\n        points = points - np.array(cs_record['translation']).reshape((-1, 1))\n        points = np.dot(Quaternion(cs_record['rotation']).rotation_matrix.T, points)\n\n        # Remove points that are partially behind the camera.\n        depths = points[2, :]\n        behind = depths < near_plane\n        if np.all(behind):\n            return None\n\n        if render_behind_cam:\n            # Perform clipping on polygons that are partially behind the camera.\n            points = NuScenesMapExplorer._clip_points_behind_camera(points, near_plane)\n\n        elif np.any(behind):\n            # Otherwise ignore any polygon that is partially behind the camera.\n            return None\n\n        # Take the actual picture (matrix multiplication with camera-matrix + renormalization).\n        points = view_points(points, cam_intrinsic, normalize=True)\n\n        # Skip polygons where all points are outside the image.\n        # Leave a margin of 1 pixel for aesthetic reasons.\n        inside = np.ones(points.shape[1], dtype=bool)\n        inside = np.logical_and(inside, points[0, :] > 1)\n        inside = np.logical_and(inside, points[0, :] < im_size[0] - 1)\n        inside = np.logical_and(inside, points[1, :] > 1)\n        inside = np.logical_and(inside, points[1, :] < im_size[1] - 1)\n\n        if render_outside_im:\n            if np.all(np.logical_not(inside)):\n                return None\n        else:\n            if np.any(np.logical_not(inside)):\n                return None\n\n        # points = points[:, inside]\n\n        # Ignore polygons with less than 3 points after clipping.\n        if len(points) == 0 or points.shape[1] < 3:\n            return None\n\n        points = points[:2, :]\n        points = [(p0, p1) for (p0, p1) in zip(points[0], points[1])]\n        return points\n\n    def get_map_mask_in_image(self,\n                              nusc: NuScenes,\n                              sample_token: str,\n                              camera_channel: str = 'CAM_FRONT',\n                              alpha: float = 0.3,\n                              patch_radius: float = 10000,\n                              min_polygon_area: float = 1000,\n                              render_behind_cam: bool = True,\n                              render_outside_im: bool = True,\n                              layer_names: List[str] = None,\n                              verbose: bool = False,\n                              out_path: str = None) -> np.ndarray:\n        \"\"\"\n        Render a nuScenes camera image and overlay the polygons for the specified map layers.\n        Note that the projections are not always accurate as the localization is in 2d.\n        :param nusc: The NuScenes instance to load the image from.\n        :param sample_token: The image's corresponding sample_token.\n        :param camera_channel: Camera channel name, e.g. 'CAM_FRONT'.\n        :param alpha: The transparency value of the layers to render in [0, 1].\n        :param patch_radius: The radius in meters around the ego car in which to select map records.\n        :param min_polygon_area: Minimum area a polygon needs to have to be rendered.\n        :param render_behind_cam: Whether to render polygons where any point is behind the camera.\n        :param render_outside_im: Whether to render polygons where any point is outside the image.\n        :param layer_names: The names of the layers to render, e.g. ['lane'].\n            If set to None, the recommended setting will be used.\n        :param verbose: Whether to print to stdout.\n        :param out_path: Optional path to save the rendered figure to disk.\n        \"\"\"\n        near_plane = 1e-8\n        if verbose:\n            print('Warning: Note that the projections are not always accurate as the localization is in 2d.')\n\n        # Default layers.\n        if layer_names is None:\n            layer_names = ['road_segment', 'lane', 'ped_crossing', 'walkway', 'stop_line', 'carpark_area']\n\n        # # Check layers whether we can render them.\n        # for layer_name in layer_names:\n        #     assert layer_name in self.map_api.non_geometric_polygon_layers, \\\n        #         'Error: Can only render non-geometry polygons: %s' % layer_names\n\n        # Check that NuScenesMap was loaded for the correct location.\n        sample_record = nusc.get('sample', sample_token)\n        scene_record = nusc.get('scene', sample_record['scene_token'])\n        log_record = nusc.get('log', scene_record['log_token'])\n        log_location = log_record['location']\n        assert self.map_api.map_name == log_location, \\\n            'Error: NuScenesMap loaded for location %s, should be %s!' % (self.map_api.map_name, log_location)\n\n        # Grab the front camera image and intrinsics.\n        cam_token = sample_record['data'][camera_channel]\n        cam_record = nusc.get('sample_data', cam_token)\n        cam_path = nusc.get_sample_data_path(cam_token)\n        im = Image.open(cam_path)\n        im_size = im.size\n        cs_record = nusc.get('calibrated_sensor', cam_record['calibrated_sensor_token'])\n        cam_intrinsic = np.array(cs_record['camera_intrinsic'])\n\n        # Retrieve the current map.\n        poserecord = nusc.get('ego_pose', cam_record['ego_pose_token'])\n        ego_pose = poserecord['translation']\n        box_coords = (\n            ego_pose[0] - patch_radius,\n            ego_pose[1] - patch_radius,\n            ego_pose[0] + patch_radius,\n            ego_pose[1] + patch_radius,\n        )\n        records_in_patch = self.get_records_in_patch(box_coords, layer_names, 'intersect')\n\n        if out_path is not None:\n            # Init axes.\n            fig = plt.figure(figsize=(9, 16))\n            ax = fig.add_axes([0, 0, 1, 1])\n            ax.set_xlim(0, im_size[0])\n            ax.set_ylim(0, im_size[1])\n            ax.imshow(im)\n\n        points_transform = partial(self.points_transform, poserecord=poserecord, cs_record=cs_record,\n                                   cam_intrinsic=cam_intrinsic, near_plane=near_plane, im_size=im_size,\n                                   render_behind_cam=render_behind_cam, render_outside_im=render_outside_im)\n\n        # Retrieve and render each record.\n        map_geom = []\n        for layer_name in layer_names:\n            if layer_name in self.map_api.non_geometric_line_layers:\n                line_list = []\n                for token in records_in_patch[layer_name]:\n                    record = self.map_api.get(layer_name, token)\n                    line = self.map_api.extract_line(record['line_token'])\n                    if line.is_empty:  # Skip lines without nodes.\n                        continue\n                    points = np.array(line.xy)\n                    points = points_transform(points)\n                    if points is None:\n                        continue\n                    line = LineString(points)\n                    line_list.append(line)\n                    # For visualize\n                    if out_path is not None:\n                        polygon = Polygon(points)\n                        ax.add_patch(descartes.PolygonPatch(polygon, fc=self.color_map[layer_name],\n                                                            alpha=alpha, label=layer_name))\n                map_geom.append((layer_name, line_list))\n            elif layer_name == 'drivable_area':\n                polygon_list = []\n                for token in records_in_patch[layer_name]:\n                    record = self.map_api.get(layer_name, token)\n                    polygons = [self.map_api.extract_polygon(polygon_token) for polygon_token in\n                                record['polygon_tokens']]\n                    for polygon in polygons:\n                        ex_points = np.array(polygon.exterior.xy)\n                        ex_points = points_transform(ex_points)\n                        if ex_points is None:\n                            continue\n                        interiors = []\n                        for interior in polygon.interiors:\n                            in_points = np.array(interior.xy)\n                            in_points = points_transform(in_points)\n                            if in_points is None:\n                                continue\n                            interiors.append(in_points)\n                        polygon = Polygon(ex_points, interiors)\n                        polygon = polygon.buffer(0.01)\n                        if polygon.geom_type == 'Polygon':\n                            polygon = MultiPolygon([polygon])\n                        # Filter small polygons\n                        if polygon.area < min_polygon_area:\n                            continue\n                        polygon_list.append(polygon)\n                        # For visualize\n                        if out_path is not None:\n                            ax.add_patch(descartes.PolygonPatch(polygon, fc=self.color_map[layer_name],\n                                                                alpha=alpha, label=layer_name))\n                map_geom.append((layer_name, polygon_list))\n            else:\n                polygon_list = []\n                for token in records_in_patch[layer_name]:\n                    record = self.map_api.get(layer_name, token)\n                    polygon = self.map_api.extract_polygon(record['polygon_token'])\n                    if polygon.is_valid:\n                        if not polygon.is_empty:\n                            ex_points = np.array(polygon.exterior.xy)\n                            ex_points = points_transform(ex_points)\n                            if ex_points is None:\n                                continue\n                            interiors = []\n                            for interior in polygon.interiors:\n                                in_points = np.array(interior.xy)\n                                in_points = points_transform(in_points)\n                                if in_points is None:\n                                    continue\n                                interiors.append(in_points)\n                            polygon = Polygon(ex_points, interiors)\n                            polygon = polygon.buffer(0.01)\n                            if polygon.geom_type == 'Polygon':\n                                polygon = MultiPolygon([polygon])\n                            # Filter small polygons\n                            if polygon.area < min_polygon_area:\n                                continue\n                            polygon_list.append(polygon)\n                            # For visualize\n                            if out_path is not None:\n                                ax.add_patch(descartes.PolygonPatch(polygon, fc=self.color_map[layer_name],\n                                                                    alpha=alpha, label=layer_name))\n                map_geom.append((layer_name, polygon_list))\n\n        # For visualize\n        if out_path is not None:\n            # Display the image.\n            plt.axis('off')\n            ax.invert_yaxis()\n            plt.tight_layout()\n            plt.savefig(out_path, bbox_inches='tight', pad_inches=0)\n            plt.close()\n\n        # Convert geometry of each layer into mask and stack them into a numpy tensor.\n        # Convert the patch box from global coordinates to local coordinates by setting the center to (0, 0).\n        local_box = (im_size[0] // 2, im_size[1] // 2, im_size[1], im_size[0])\n        canvas_size = (im_size[1], im_size[0])\n        img_mask = self.map_geom_to_mask(map_geom, local_box, canvas_size)\n        assert np.all(img_mask.shape[1:] == canvas_size)\n        return img_mask\n\n    def render_egoposes_on_fancy_map(self,\n                                     nusc: NuScenes,\n                                     scene_tokens: List = None,\n                                     verbose: bool = True,\n                                     out_path: str = None,\n                                     render_egoposes: bool = True,\n                                     render_egoposes_range: bool = True,\n                                     render_legend: bool = True,\n                                     bitmap: Optional[BitMap] = None) -> Tuple[np.ndarray, Figure, Axes]:\n        \"\"\"\n        Renders each ego pose of a list of scenes on the map (around 40 poses per scene).\n        This method is heavily inspired by NuScenes.render_egoposes_on_map(), but uses the map expansion pack maps.\n        Note that the maps are constantly evolving, whereas we only released a single snapshot of the data.\n        Therefore for some scenes there is a bad fit between ego poses and maps.\n        :param nusc: The NuScenes instance to load the ego poses from.\n        :param scene_tokens: Optional list of scene tokens corresponding to the current map location.\n        :param verbose: Whether to show status messages and progress bar.\n        :param out_path: Optional path to save the rendered figure to disk.\n        :param render_egoposes: Whether to render ego poses.\n        :param render_egoposes_range: Whether to render a rectangle around all ego poses.\n        :param render_legend: Whether to render the legend of map layers.\n        :param bitmap: Optional BitMap object to render below the other map layers.\n        :return: <np.float32: n, 2>. Returns a matrix with n ego poses in global map coordinates.\n        \"\"\"\n        # Settings\n        patch_margin = 2\n        min_diff_patch = 30\n\n        # Ids of scenes with a bad match between localization and map.\n        scene_blacklist = [499, 515, 517]\n\n        # Get logs by location.\n        log_location = self.map_api.map_name\n        log_tokens = [log['token'] for log in nusc.log if log['location'] == log_location]\n        assert len(log_tokens) > 0, 'Error: This split has 0 scenes for location %s!' % log_location\n\n        # Filter scenes.\n        scene_tokens_location = [e['token'] for e in nusc.scene if e['log_token'] in log_tokens]\n        if scene_tokens is not None:\n            scene_tokens_location = [t for t in scene_tokens_location if t in scene_tokens]\n        assert len(scene_tokens_location) > 0, 'Error: Found 0 valid scenes for location %s!' % log_location\n\n        map_poses = []\n        if verbose:\n            print('Adding ego poses to map...')\n        for scene_token in tqdm(scene_tokens_location, disable=not verbose):\n            # Check that the scene is from the correct location.\n            scene_record = nusc.get('scene', scene_token)\n            scene_name = scene_record['name']\n            scene_id = int(scene_name.replace('scene-', ''))\n            log_record = nusc.get('log', scene_record['log_token'])\n            assert log_record['location'] == log_location, \\\n                'Error: The provided scene_tokens do not correspond to the provided map location!'\n\n            # Print a warning if the localization is known to be bad.\n            if verbose and scene_id in scene_blacklist:\n                print('Warning: %s is known to have a bad fit between ego pose and map.' % scene_name)\n\n            # For each sample in the scene, store the ego pose.\n            sample_tokens = nusc.field2token('sample', 'scene_token', scene_token)\n            for sample_token in sample_tokens:\n                sample_record = nusc.get('sample', sample_token)\n\n                # Poses are associated with the sample_data. Here we use the lidar sample_data.\n                sample_data_record = nusc.get('sample_data', sample_record['data']['LIDAR_TOP'])\n                pose_record = nusc.get('ego_pose', sample_data_record['ego_pose_token'])\n\n                # Calculate the pose on the map and append.\n                map_poses.append(pose_record['translation'])\n\n        # Check that ego poses aren't empty.\n        assert len(map_poses) > 0, 'Error: Found 0 ego poses. Please check the inputs.'\n\n        # Compute number of close ego poses.\n        if verbose:\n            print('Creating plot...')\n        map_poses = np.vstack(map_poses)[:, :2]\n\n        # Render the map patch with the current ego poses.\n        min_patch = np.floor(map_poses.min(axis=0) - patch_margin)\n        max_patch = np.ceil(map_poses.max(axis=0) + patch_margin)\n        diff_patch = max_patch - min_patch\n        if any(diff_patch < min_diff_patch):\n            center_patch = (min_patch + max_patch) / 2\n            diff_patch = np.maximum(diff_patch, min_diff_patch)\n            min_patch = center_patch - diff_patch / 2\n            max_patch = center_patch + diff_patch / 2\n        my_patch = (min_patch[0], min_patch[1], max_patch[0], max_patch[1])\n        fig, ax = self.render_map_patch(my_patch, self.map_api.non_geometric_layers, figsize=(10, 10),\n                                        render_egoposes_range=render_egoposes_range,\n                                        render_legend=render_legend, bitmap=bitmap)\n\n        # Plot in the same axis as the map.\n        # Make sure these are plotted \"on top\".\n        if render_egoposes:\n            ax.scatter(map_poses[:, 0], map_poses[:, 1], s=20, c='k', alpha=1.0, zorder=2)\n        plt.axis('off')\n\n        if out_path is not None:\n            plt.savefig(out_path, bbox_inches='tight', pad_inches=0)\n\n        return map_poses, fig, ax\n\n    def render_next_roads(self,\n                          x: float,\n                          y: float,\n                          alpha: float = 0.5,\n                          figsize: Union[None, float, Tuple[float, float]] = None,\n                          bitmap: Optional[BitMap] = None) -> Tuple[Figure, Axes]:\n        \"\"\"\n        Renders the possible next roads from a point of interest.\n        :param x: x coordinate of the point of interest.\n        :param y: y coordinate of the point of interest.\n        :param alpha: The opacity of each layer that gets rendered.\n        :param figsize: Size of the whole figure.\n        :param bitmap: Optional BitMap object to render below the other map layers.\n        \"\"\"\n        # Get next roads.\n        next_roads = self.map_api.get_next_roads(x, y)\n        layer_names = []\n        tokens = []\n        for layer_name, layer_tokens in next_roads.items():\n            if len(layer_tokens) > 0:\n                layer_names.append(layer_name)\n                tokens.extend(layer_tokens)\n\n        # Render them.\n        fig, ax = self.render_layers(layer_names, alpha, figsize, tokens=tokens, bitmap=bitmap)\n\n        # Render current location with an x.\n        ax.plot(x, y, 'x', markersize=12, color='red')\n\n        return fig, ax\n\n    @staticmethod\n    def _clip_points_behind_camera(points, near_plane: float):\n        \"\"\"\n        Perform clipping on polygons that are partially behind the camera.\n        This method is necessary as the projection does not work for points behind the camera.\n        Hence we compute the line between the point and the camera and follow that line until we hit the near plane of\n        the camera. Then we use that point.\n        :param points: <np.float32: 3, n> Matrix of points, where each point (x, y, z) is along each column.\n        :param near_plane: If we set the near_plane distance of the camera to 0 then some points will project to\n            infinity. Therefore we need to clip these points at the near plane.\n        :return: The clipped version of the polygon. This may have fewer points than the original polygon if some lines\n            were entirely behind the polygon.\n        \"\"\"\n        points_clipped = []\n        # Loop through each line on the polygon.\n        # For each line where exactly 1 endpoints is behind the camera, move the point along the line until\n        # it hits the near plane of the camera (clipping).\n        assert points.shape[0] == 3\n        point_count = points.shape[1]\n        for line_1 in range(point_count):\n            line_2 = (line_1 + 1) % point_count\n            point_1 = points[:, line_1]\n            point_2 = points[:, line_2]\n            z_1 = point_1[2]\n            z_2 = point_2[2]\n\n            if z_1 >= near_plane and z_2 >= near_plane:\n                # Both points are in front.\n                # Add both points unless the first is already added.\n                if len(points_clipped) == 0 or all(points_clipped[-1] != point_1):\n                    points_clipped.append(point_1)\n                points_clipped.append(point_2)\n            elif z_1 < near_plane and z_2 < near_plane:\n                # Both points are in behind.\n                # Don't add anything.\n                continue\n            else:\n                # One point is in front, one behind.\n                # By convention pointA is behind the camera and pointB in front.\n                if z_1 <= z_2:\n                    point_a = points[:, line_1]\n                    point_b = points[:, line_2]\n                else:\n                    point_a = points[:, line_2]\n                    point_b = points[:, line_1]\n                z_a = point_a[2]\n                z_b = point_b[2]\n\n                # Clip line along near plane.\n                pointdiff = point_b - point_a\n                alpha = (near_plane - z_b) / (z_a - z_b)\n                clipped = point_a + (1 - alpha) * pointdiff\n                assert np.abs(clipped[2] - near_plane) < 1e-6\n\n                # Add the first point (if valid and not duplicate), the clipped point and the second point (if valid).\n                if z_1 >= near_plane and (len(points_clipped) == 0 or all(points_clipped[-1] != point_1)):\n                    points_clipped.append(point_1)\n                points_clipped.append(clipped)\n                if z_2 >= near_plane:\n                    points_clipped.append(point_2)\n\n        points_clipped = np.array(points_clipped).transpose()\n        return points_clipped\n\n    def get_records_in_patch(self,\n                             box_coords: Tuple[float, float, float, float],\n                             layer_names: List[str] = None,\n                             mode: str = 'intersect') -> Dict[str, List[str]]:\n        \"\"\"\n        Get all the record token that intersects or within a particular rectangular patch.\n        :param box_coords: The rectangular patch coordinates (x_min, y_min, x_max, y_max).\n        :param layer_names: Names of the layers that we want to retrieve in a particular patch.\n            By default will always look for all non geometric layers.\n        :param mode: \"intersect\" will return all non geometric records that intersects the patch,\n            \"within\" will return all non geometric records that are within the patch.\n        :return: Dictionary of layer_name - tokens pairs.\n        \"\"\"\n        if mode not in ['intersect', 'within']:\n            raise ValueError(\"Mode {} is not valid, choice=('intersect', 'within')\".format(mode))\n\n        if layer_names is None:\n            layer_names = self.map_api.non_geometric_layers\n\n        records_in_patch = dict()\n        for layer_name in layer_names:\n            layer_records = []\n            for record in getattr(self.map_api, layer_name):\n                token = record['token']\n                if self.is_record_in_patch(layer_name, token, box_coords, mode):\n                    layer_records.append(token)\n\n            records_in_patch.update({layer_name: layer_records})\n\n        return records_in_patch\n\n    def is_record_in_patch(self,\n                           layer_name: str,\n                           token: str,\n                           box_coords: Tuple[float, float, float, float],\n                           mode: str = 'intersect') -> bool:\n        \"\"\"\n        Query whether a particular record is in a rectangular patch.\n        :param layer_name: The layer name of the record.\n        :param token: The record token.\n        :param box_coords: The rectangular patch coordinates (x_min, y_min, x_max, y_max).\n        :param mode: \"intersect\" means it will return True if the geometric object intersects the patch and False\n        otherwise, \"within\" will return True if the geometric object is within the patch and False otherwise.\n        :return: Boolean value on whether a particular record intersects or is within a particular patch.\n        \"\"\"\n        if mode not in ['intersect', 'within']:\n            raise ValueError(\"Mode {} is not valid, choice=('intersect', 'within')\".format(mode))\n\n        if layer_name in self.map_api.lookup_polygon_layers:\n            return self._is_polygon_record_in_patch(token, layer_name, box_coords, mode)\n        elif layer_name in self.map_api.non_geometric_line_layers:\n            return self._is_line_record_in_patch(token, layer_name, box_coords,  mode)\n        else:\n            raise ValueError(\"{} is not a valid layer\".format(layer_name))\n\n    def layers_on_point(self, x: float, y: float, layer_names: List[str] = None) -> Dict[str, str]:\n        \"\"\"\n        Returns all the polygonal layers that a particular point is on.\n        :param x: x coordinate of the point of interest.\n        :param y: y coordinate of the point of interest.\n        :param layer_names: The names of the layers to search for.\n        :return: All the polygonal layers that a particular point is on.\n        \"\"\"\n        # Default option.\n        if layer_names is None:\n            layer_names = self.map_api.non_geometric_polygon_layers\n\n        layers_on_point = dict()\n        for layer_name in layer_names:\n            layers_on_point.update({layer_name: self.record_on_point(x, y, layer_name)})\n\n        return layers_on_point\n\n    def record_on_point(self, x: float, y: float, layer_name: str) -> str:\n        \"\"\"\n        Query what record of a layer a particular point is on.\n        :param x: x coordinate of the point of interest.\n        :param y: y coordinate of the point of interest.\n        :param layer_name: The non geometric polygonal layer name that we are interested in.\n        :return: The first token of a layer a particular point is on or '' if no layer is found.\n        \"\"\"\n        if layer_name not in self.map_api.non_geometric_polygon_layers:\n            raise ValueError(\"{} is not a polygon layer\".format(layer_name))\n\n        point = Point(x, y)\n        records = getattr(self.map_api, layer_name)\n\n        if layer_name == 'drivable_area':\n            for record in records:\n                polygons = [self.map_api.extract_polygon(polygon_token) for polygon_token in record['polygon_tokens']]\n                for polygon in polygons:\n                    if point.within(polygon):\n                        return record['token']\n                    else:\n                        pass\n        else:\n            for record in records:\n                polygon = self.map_api.extract_polygon(record['polygon_token'])\n                if point.within(polygon):\n                    return record['token']\n                else:\n                    pass\n\n        # If nothing is found, return an empty string.\n        return ''\n\n    def extract_polygon(self, polygon_token: str) -> Polygon:\n        \"\"\"\n        Construct a shapely Polygon object out of a polygon token.\n        :param polygon_token: The token of the polygon record.\n        :return: The polygon wrapped in a shapely Polygon object.\n        \"\"\"\n        polygon_record = self.map_api.get('polygon', polygon_token)\n\n        exterior_coords = [(self.map_api.get('node', token)['x'], self.map_api.get('node', token)['y'])\n                           for token in polygon_record['exterior_node_tokens']]\n\n        interiors = []\n        for hole in polygon_record['holes']:\n            interior_coords = [(self.map_api.get('node', token)['x'], self.map_api.get('node', token)['y'])\n                               for token in hole['node_tokens']]\n            if len(interior_coords) > 0:  # Add only non-empty holes.\n                interiors.append(interior_coords)\n\n        return Polygon(exterior_coords, interiors)\n\n    def extract_line(self, line_token: str) -> LineString:\n        \"\"\"\n        Construct a shapely LineString object out of a line token.\n        :param line_token: The token of the line record.\n        :return: The line wrapped in a LineString object.\n        \"\"\"\n        line_record = self.map_api.get('line', line_token)\n        line_nodes = [(self.map_api.get('node', token)['x'], self.map_api.get('node', token)['y'])\n                      for token in line_record['node_tokens']]\n\n        return LineString(line_nodes)\n\n    def get_bounds(self, layer_name: str, token: str) -> Tuple[float, float, float, float]:\n        \"\"\"\n        Get the bounds of the geometric object that corresponds to a non geometric record.\n        :param layer_name: Name of the layer that we are interested in.\n        :param token: Token of the record.\n        :return: min_x, min_y, max_x, max_y of the line representation.\n        \"\"\"\n        if layer_name in self.map_api.non_geometric_polygon_layers:\n            return self._get_polygon_bounds(layer_name, token)\n        elif layer_name in self.map_api.non_geometric_line_layers:\n            return self._get_line_bounds(layer_name, token)\n        else:\n            raise ValueError(\"{} is not a valid layer\".format(layer_name))\n\n    def _get_polygon_bounds(self, layer_name: str, token: str) -> Tuple[float, float, float, float]:\n        \"\"\"\n        Get the extremities of the polygon object that corresponds to a non geometric record.\n        :param layer_name: Name of the layer that we are interested in.\n        :param token: Token of the record.\n        :return: min_x, min_y, max_x, max_y of of the polygon or polygons (for drivable_area) representation.\n        \"\"\"\n        if layer_name not in self.map_api.non_geometric_polygon_layers:\n            raise ValueError(\"{} is not a record with polygon representation\".format(token))\n\n        record = self.map_api.get(layer_name, token)\n\n        if layer_name == 'drivable_area':\n            polygons = [self.map_api.get('polygon', polygon_token) for polygon_token in record['polygon_tokens']]\n            exterior_node_coords = []\n\n            for polygon in polygons:\n                nodes = [self.map_api.get('node', node_token) for node_token in polygon['exterior_node_tokens']]\n                node_coords = [(node['x'], node['y']) for node in nodes]\n                exterior_node_coords.extend(node_coords)\n\n            exterior_node_coords = np.array(exterior_node_coords)\n        else:\n            exterior_nodes = [self.map_api.get('node', token) for token in record['exterior_node_tokens']]\n            exterior_node_coords = np.array([(node['x'], node['y']) for node in exterior_nodes])\n\n        xs = exterior_node_coords[:, 0]\n        ys = exterior_node_coords[:, 1]\n\n        x2 = xs.max()\n        x1 = xs.min()\n        y2 = ys.max()\n        y1 = ys.min()\n\n        return x1, y1, x2, y2\n\n    def _get_line_bounds(self, layer_name: str, token: str) -> Tuple[float, float, float, float]:\n        \"\"\"\n        Get the bounds of the line object that corresponds to a non geometric record.\n        :param layer_name: Name of the layer that we are interested in.\n        :param token: Token of the record.\n        :return: min_x, min_y, max_x, max_y of of the line representation.\n        \"\"\"\n        if layer_name not in self.map_api.non_geometric_line_layers:\n            raise ValueError(\"{} is not a record with line representation\".format(token))\n\n        record = self.map_api.get(layer_name, token)\n        nodes = [self.map_api.get('node', node_token) for node_token in record['node_tokens']]\n        node_coords = [(node['x'], node['y']) for node in nodes]\n        node_coords = np.array(node_coords)\n\n        xs = node_coords[:, 0]\n        ys = node_coords[:, 1]\n\n        x2 = xs.max()\n        x1 = xs.min()\n        y2 = ys.max()\n        y1 = ys.min()\n\n        return x1, y1, x2, y2\n\n    def _is_polygon_record_in_patch(self,\n                                    token: str,\n                                    layer_name: str,\n                                    box_coords: Tuple[float, float, float, float],\n                                    mode: str = 'intersect') -> bool:\n        \"\"\"\n        Query whether a particular polygon record is in a rectangular patch.\n        :param layer_name: The layer name of the record.\n        :param token: The record token.\n        :param box_coords: The rectangular patch coordinates (x_min, y_min, x_max, y_max).\n        :param mode: \"intersect\" means it will return True if the geometric object intersects the patch and False\n        otherwise, \"within\" will return True if the geometric object is within the patch and False otherwise.\n        :return: Boolean value on whether a particular polygon record intersects or is within a particular patch.\n        \"\"\"\n        if layer_name not in self.map_api.lookup_polygon_layers:\n            raise ValueError('{} is not a polygonal layer'.format(layer_name))\n\n        x_min, y_min, x_max, y_max = box_coords\n        record = self.map_api.get(layer_name, token)\n        rectangular_patch = box(x_min, y_min, x_max, y_max)\n\n        if layer_name == 'drivable_area':\n            polygons = [self.map_api.extract_polygon(polygon_token) for polygon_token in record['polygon_tokens']]\n            geom = MultiPolygon(polygons)\n        else:\n            geom = self.map_api.extract_polygon(record['polygon_token'])\n\n        if mode == 'intersect':\n            return geom.intersects(rectangular_patch)\n        elif mode == 'within':\n            return geom.within(rectangular_patch)\n\n    def _is_line_record_in_patch(self,\n                                 token: str,\n                                 layer_name: str,\n                                 box_coords: Tuple[float, float, float, float],\n                                 mode: str = 'intersect') -> bool:\n        \"\"\"\n        Query whether a particular line record is in a rectangular patch.\n        :param layer_name: The layer name of the record.\n        :param token: The record token.\n        :param box_coords: The rectangular patch coordinates (x_min, y_min, x_max, y_max).\n        :param mode: \"intersect\" means it will return True if the geometric object intersects the patch and False\n        otherwise, \"within\" will return True if the geometric object is within the patch and False otherwise.\n        :return: Boolean value on whether a particular line  record intersects or is within a particular patch.\n        \"\"\"\n        if layer_name not in self.map_api.non_geometric_line_layers:\n            raise ValueError(\"{} is not a line layer\".format(layer_name))\n\n        # Retrieve nodes of this line.\n        record = self.map_api.get(layer_name, token)\n        node_recs = [self.map_api.get('node', node_token) for node_token in record['node_tokens']]\n        node_coords = [[node['x'], node['y']] for node in node_recs]\n        node_coords = np.array(node_coords)\n\n        # A few lines in Queenstown have zero nodes. In this case we return False.\n        if len(node_coords) == 0:\n            return False\n\n        # Check that nodes fall inside the path.\n        x_min, y_min, x_max, y_max = box_coords\n        cond_x = np.logical_and(node_coords[:, 0] < x_max, node_coords[:, 0] > x_min)\n        cond_y = np.logical_and(node_coords[:, 1] < y_max, node_coords[:, 1] > y_min)\n        cond = np.logical_and(cond_x, cond_y)\n        if mode == 'intersect':\n            return np.any(cond)\n        elif mode == 'within':\n            return np.all(cond)\n\n    def _render_layer(self, ax: Axes, layer_name: str, alpha: float, tokens: List[str] = None) -> None:\n        \"\"\"\n        Wrapper method that renders individual layers on an axis.\n        :param ax: The matplotlib axes where the layer will get rendered.\n        :param layer_name: Name of the layer that we are interested in.\n        :param alpha: The opacity of the layer to be rendered.\n        :param tokens: Optional list of tokens to render. None means all tokens are rendered.\n        \"\"\"\n        if layer_name in self.map_api.non_geometric_polygon_layers:\n            self._render_polygon_layer(ax, layer_name, alpha, tokens)\n        elif layer_name in self.map_api.non_geometric_line_layers:\n            self._render_line_layer(ax, layer_name, alpha, tokens)\n        else:\n            raise ValueError(\"{} is not a valid layer\".format(layer_name))\n\n    def _render_polygon_layer(self, ax: Axes, layer_name: str, alpha: float, tokens: List[str] = None) -> None:\n        \"\"\"\n        Renders an individual non-geometric polygon layer on an axis.\n        :param ax: The matplotlib axes where the layer will get rendered.\n        :param layer_name: Name of the layer that we are interested in.\n        :param alpha: The opacity of the layer to be rendered.\n        :param tokens: Optional list of tokens to render. None means all tokens are rendered.\n        \"\"\"\n        if layer_name not in self.map_api.non_geometric_polygon_layers:\n            raise ValueError('{} is not a polygonal layer'.format(layer_name))\n\n        first_time = True\n        records = getattr(self.map_api, layer_name)\n        if tokens is not None:\n            records = [r for r in records if r['token'] in tokens]\n        if layer_name == 'drivable_area':\n            for record in records:\n                polygons = [self.map_api.extract_polygon(polygon_token) for polygon_token in record['polygon_tokens']]\n\n                for polygon in polygons:\n                    if first_time:\n                        label = layer_name\n                        first_time = False\n                    else:\n                        label = None\n                    ax.add_patch(descartes.PolygonPatch(polygon, fc=self.color_map[layer_name], alpha=alpha,\n                                                        label=label))\n        else:\n            for record in records:\n                polygon = self.map_api.extract_polygon(record['polygon_token'])\n\n                if first_time:\n                    label = layer_name\n                    first_time = False\n                else:\n                    label = None\n\n                ax.add_patch(descartes.PolygonPatch(polygon, fc=self.color_map[layer_name], alpha=alpha,\n                                                    label=label))\n\n    def _render_line_layer(self, ax: Axes, layer_name: str, alpha: float, tokens: List[str] = None) -> None:\n        \"\"\"\n        Renders an individual non-geometric line layer on an axis.\n        :param ax: The matplotlib axes where the layer will get rendered.\n        :param layer_name: Name of the layer that we are interested in.\n        :param alpha: The opacity of the layer to be rendered.\n        :param tokens: Optional list of tokens to render. None means all tokens are rendered.\n        \"\"\"\n        if layer_name not in self.map_api.non_geometric_line_layers:\n            raise ValueError(\"{} is not a line layer\".format(layer_name))\n\n        first_time = True\n        records = getattr(self.map_api, layer_name)\n        if tokens is not None:\n            records = [r for r in records if r['token'] in tokens]\n        for record in records:\n            if first_time:\n                label = layer_name\n                first_time = False\n            else:\n                label = None\n            line = self.map_api.extract_line(record['line_token'])\n            if line.is_empty:  # Skip lines without nodes\n                continue\n            xs, ys = line.xy\n\n            if layer_name == 'traffic_light':\n                # Draws an arrow with the physical traffic light as the starting point, pointing to the direction on\n                # where the traffic light points.\n                ax.add_patch(Arrow(xs[0], ys[0], xs[1]-xs[0], ys[1]-ys[0], color=self.color_map[layer_name],\n                                   label=label))\n            else:\n                ax.plot(xs, ys, color=self.color_map[layer_name], alpha=alpha, label=label)\n\n    def _get_layer_geom(self,\n                        patch_box: Tuple[float, float, float, float],\n                        patch_angle: float,\n                        layer_name: str) -> List[Geometry]:\n        \"\"\"\n        Wrapper method that gets the geometries for each layer.\n        :param patch_box: Patch box defined as [x_center, y_center, height, width].\n        :param patch_angle: Patch orientation in degrees.\n        :param layer_name: Name of map layer to be converted to binary map mask patch.\n        :return: List of geometries for the given layer.\n        \"\"\"\n        if layer_name in self.map_api.non_geometric_polygon_layers:\n            return self._get_layer_polygon(patch_box, patch_angle, layer_name)\n        elif layer_name in self.map_api.non_geometric_line_layers:\n            return self._get_layer_line(patch_box, patch_angle, layer_name)\n        else:\n            raise ValueError(\"{} is not a valid layer\".format(layer_name))\n\n    def _layer_geom_to_mask(self,\n                            layer_name: str,\n                            layer_geom: List[Geometry],\n                            local_box: Tuple[float, float, float, float],\n                            canvas_size: Tuple[int, int]) -> np.ndarray:\n        \"\"\"\n        Wrapper method that gets the mask for each layer's geometries.\n        :param layer_name: The name of the layer for which we get the masks.\n        :param layer_geom: List of the geometries of the layer specified in layer_name.\n        :param local_box: The local patch box defined as (x_center, y_center, height, width), where typically\n            x_center = y_center = 0.\n        :param canvas_size: Size of the output mask (h, w).\n        \"\"\"\n        if layer_name in self.map_api.non_geometric_polygon_layers:\n            return self._polygon_geom_to_mask(layer_geom, local_box, layer_name, canvas_size)\n        elif layer_name in self.map_api.non_geometric_line_layers:\n            return self._line_geom_to_mask(layer_geom, local_box, layer_name, canvas_size)\n        else:\n            raise ValueError(\"{} is not a valid layer\".format(layer_name))\n\n    @staticmethod\n    def mask_for_polygons(polygons: MultiPolygon, mask: np.ndarray) -> np.ndarray:\n        \"\"\"\n        Convert a polygon or multipolygon list to an image mask ndarray.\n        :param polygons: List of Shapely polygons to be converted to numpy array.\n        :param mask: Canvas where mask will be generated.\n        :return: Numpy ndarray polygon mask.\n        \"\"\"\n        if not polygons:\n            return mask\n\n        def int_coords(x):\n            # function to round and convert to int\n            return np.array(x).round().astype(np.int32)\n        exteriors = [int_coords(poly.exterior.coords) for poly in polygons]\n        interiors = [int_coords(pi.coords) for poly in polygons for pi in poly.interiors]\n        cv2.fillPoly(mask, exteriors, 1)\n        cv2.fillPoly(mask, interiors, 0)\n        return mask\n\n    @staticmethod\n    def mask_for_lines(lines: LineString, mask: np.ndarray) -> np.ndarray:\n        \"\"\"\n        Convert a Shapely LineString back to an image mask ndarray.\n        :param lines: List of shapely LineStrings to be converted to a numpy array.\n        :param mask: Canvas where mask will be generated.\n        :return: Numpy ndarray line mask.\n        \"\"\"\n        if lines.geom_type == 'MultiLineString':\n            for line in lines:\n                coords = np.asarray(list(line.coords), np.int32)\n                coords = coords.reshape((-1, 2))\n                cv2.polylines(mask, [coords], False, 1, 2)\n        else:\n            coords = np.asarray(list(lines.coords), np.int32)\n            coords = coords.reshape((-1, 2))\n            cv2.polylines(mask, [coords], False, 1, 2)\n\n        return mask\n\n    def _polygon_geom_to_mask(self,\n                              layer_geom: List[Polygon],\n                              local_box: Tuple[float, float, float, float],\n                              layer_name: str,\n                              canvas_size: Tuple[int, int]) -> np.ndarray:\n        \"\"\"\n        Convert polygon inside patch to binary mask and return the map patch.\n        :param layer_geom: list of polygons for each map layer\n        :param local_box: The local patch box defined as (x_center, y_center, height, width), where typically\n            x_center = y_center = 0.\n        :param layer_name: name of map layer to be converted to binary map mask patch.\n        :param canvas_size: Size of the output mask (h, w).\n        :return: Binary map mask patch with the size canvas_size.\n        \"\"\"\n        if layer_name not in self.map_api.non_geometric_polygon_layers:\n            raise ValueError('{} is not a polygonal layer'.format(layer_name))\n\n        patch_x, patch_y, patch_h, patch_w = local_box\n\n        patch = self.get_patch_coord(local_box)\n\n        canvas_h = canvas_size[0]\n        canvas_w = canvas_size[1]\n\n        scale_height = canvas_h / patch_h\n        scale_width = canvas_w / patch_w\n\n        trans_x = -patch_x + patch_w / 2.0\n        trans_y = -patch_y + patch_h / 2.0\n\n        map_mask = np.zeros(canvas_size, np.uint8)\n\n        for polygon in layer_geom:\n            new_polygon = polygon.intersection(patch)\n            if not new_polygon.is_empty:\n                new_polygon = affinity.affine_transform(new_polygon,\n                                                        [1.0, 0.0, 0.0, 1.0, trans_x, trans_y])\n                new_polygon = affinity.scale(new_polygon, xfact=scale_width, yfact=scale_height, origin=(0, 0))\n\n                if new_polygon.geom_type == 'Polygon':\n                    new_polygon = MultiPolygon([new_polygon])\n\n                # if new_polygon.area < 1000:\n                #     continue\n\n                if not isinstance(new_polygon, MultiPolygon):\n                    print(new_polygon)\n                    \n                    continue\n\n                map_mask = self.mask_for_polygons(new_polygon, map_mask)\n\n        return map_mask\n\n    def _line_geom_to_mask(self,\n                           layer_geom: List[LineString],\n                           local_box: Tuple[float, float, float, float],\n                           layer_name: str,\n                           canvas_size: Tuple[int, int]) -> Optional[np.ndarray]:\n        \"\"\"\n        Convert line inside patch to binary mask and return the map patch.\n        :param layer_geom: list of LineStrings for each map layer\n        :param local_box: The local patch box defined as (x_center, y_center, height, width), where typically\n            x_center = y_center = 0.\n        :param layer_name: name of map layer to be converted to binary map mask patch.\n        :param canvas_size: Size of the output mask (h, w).\n        :return: Binary map mask patch in a canvas size.\n        \"\"\"\n        if layer_name not in self.map_api.non_geometric_line_layers:\n            raise ValueError(\"{} is not a line layer\".format(layer_name))\n\n        patch_x, patch_y, patch_h, patch_w = local_box\n\n        patch = self.get_patch_coord(local_box)\n\n        canvas_h = canvas_size[0]\n        canvas_w = canvas_size[1]\n        scale_height = canvas_h/patch_h\n        scale_width = canvas_w/patch_w\n\n        trans_x = -patch_x + patch_w / 2.0\n        trans_y = -patch_y + patch_h / 2.0\n\n        map_mask = np.zeros(canvas_size, np.uint8)\n\n        if layer_name == 'traffic_light':\n            return None\n\n        for line in layer_geom:\n            new_line = line.intersection(patch)\n            if not new_line.is_empty:\n                new_line = affinity.affine_transform(new_line,\n                                                     [1.0, 0.0, 0.0, 1.0, trans_x, trans_y])\n                new_line = affinity.scale(new_line, xfact=scale_width, yfact=scale_height, origin=(0, 0))\n\n                map_mask = self.mask_for_lines(new_line, map_mask)\n        return map_mask\n\n    def _get_layer_polygon(self,\n                           patch_box: Tuple[float, float, float, float],\n                           patch_angle: float,\n                           layer_name: str) -> List[Polygon]:\n        \"\"\"\n         Retrieve the polygons of a particular layer within the specified patch.\n         :param patch_box: Patch box defined as [x_center, y_center, height, width].\n         :param patch_angle: Patch orientation in degrees.\n         :param layer_name: name of map layer to be extracted.\n         :return: List of Polygon in a patch box.\n         \"\"\"\n        if layer_name not in self.map_api.non_geometric_polygon_layers:\n            raise ValueError('{} is not a polygonal layer'.format(layer_name))\n\n        patch_x = patch_box[0]\n        patch_y = patch_box[1]\n\n        patch = self.get_patch_coord(patch_box, patch_angle)\n\n        records = getattr(self.map_api, layer_name)\n\n        polygon_list = []\n        if layer_name == 'drivable_area':\n            for record in records:\n                polygons = [self.map_api.extract_polygon(polygon_token) for polygon_token in record['polygon_tokens']]\n\n                for polygon in polygons:\n                    new_polygon = polygon.intersection(patch)\n                    if not new_polygon.is_empty:\n                        new_polygon = affinity.rotate(new_polygon, -patch_angle,\n                                                      origin=(patch_x, patch_y), use_radians=False)\n                        new_polygon = affinity.affine_transform(new_polygon,\n                                                                [1.0, 0.0, 0.0, 1.0, -patch_x, -patch_y])\n                        if new_polygon.geom_type == 'Polygon':\n                            new_polygon = MultiPolygon([new_polygon])\n                        polygon_list.append(new_polygon)\n\n        else:\n            for record in records:\n                polygon = self.map_api.extract_polygon(record['polygon_token'])\n\n                if polygon.is_valid:\n                    new_polygon = polygon.intersection(patch)\n                    if not new_polygon.is_empty:\n                        new_polygon = affinity.rotate(new_polygon, -patch_angle,\n                                                      origin=(patch_x, patch_y), use_radians=False)\n                        new_polygon = affinity.affine_transform(new_polygon,\n                                                                [1.0, 0.0, 0.0, 1.0, -patch_x, -patch_y])\n                        if new_polygon.geom_type == 'Polygon':\n                            new_polygon = MultiPolygon([new_polygon])\n                        polygon_list.append(new_polygon)\n\n        return polygon_list\n\n    def _get_layer_line(self,\n                        patch_box: Tuple[float, float, float, float],\n                        patch_angle: float,\n                        layer_name: str) -> Optional[List[LineString]]:\n        \"\"\"\n        Retrieve the lines of a particular layer within the specified patch.\n        :param patch_box: Patch box defined as [x_center, y_center, height, width].\n        :param patch_angle: Patch orientation in degrees.\n        :param layer_name: name of map layer to be converted to binary map mask patch.\n        :return: List of LineString in a patch box.\n        \"\"\"\n        if layer_name not in self.map_api.non_geometric_line_layers:\n            raise ValueError(\"{} is not a line layer\".format(layer_name))\n\n        if layer_name == 'traffic_light':\n            return None\n\n        patch_x = patch_box[0]\n        patch_y = patch_box[1]\n\n        patch = self.get_patch_coord(patch_box, patch_angle)\n\n        line_list = []\n        records = getattr(self.map_api, layer_name)\n        for record in records:\n            line = self.map_api.extract_line(record['line_token'])\n            if line.is_empty:  # Skip lines without nodes.\n                continue\n\n            new_line = line.intersection(patch)\n            if not new_line.is_empty:\n                new_line = affinity.rotate(new_line, -patch_angle,\n                                           origin=(patch_x, patch_y), use_radians=False)\n                new_line = affinity.affine_transform(new_line,\n                                                     [1.0, 0.0, 0.0, 1.0, -patch_x, -patch_y])\n                line_list.append(new_line)\n\n        return line_list\n\n    @staticmethod\n    def get_patch_coord(patch_box: Tuple[float, float, float, float],\n                        patch_angle: float = 0.0) -> Polygon:\n        \"\"\"\n        Convert patch_box to shapely Polygon coordinates.\n        :param patch_box: Patch box defined as [x_center, y_center, height, width].\n        :param patch_angle: Patch orientation in degrees.\n        :return: Box Polygon for patch_box.\n        \"\"\"\n        patch_x, patch_y, patch_h, patch_w = patch_box\n\n        x_min = patch_x - patch_w / 2.0\n        y_min = patch_y - patch_h / 2.0\n        x_max = patch_x + patch_w / 2.0\n        y_max = patch_y + patch_h / 2.0\n\n        patch = box(x_min, y_min, x_max, y_max)\n        patch = affinity.rotate(patch, patch_angle, origin=(patch_x, patch_y), use_radians=False)\n\n        return patch\n\n    def _get_figsize(self, figsize: Union[None, float, Tuple[float, float]]) -> Tuple[float, float]:\n        \"\"\"\n        Utility function that scales the figure size by the map canvas size.\n        If figsize is:\n        - None      => Return default scale.\n        - Scalar    => Scale canvas size.\n        - Two-tuple => Use the specified figure size.\n        :param figsize: The input figure size.\n        :return: The output figure size.\n        \"\"\"\n        # Divide canvas size by arbitrary scalar to get into cm range.\n        canvas_size = np.array(self.map_api.canvas_edge)[::-1] / 200\n\n        if figsize is None:\n            return tuple(canvas_size)\n        elif type(figsize) in [int, float]:\n            return tuple(canvas_size * figsize)\n        elif type(figsize) == tuple and len(figsize) == 2:\n            return figsize\n        else:\n            raise Exception('Error: Invalid figsize: %s' % figsize)\n"
  },
  {
    "path": "mmdet3d/datasets/evals/metric_utils.py",
    "content": "import torch\nimport math\nimport numpy as np\nfrom typing import List, Dict, Tuple, Callable, Union\n\ndef min_ade(traj: torch.Tensor, traj_gt: torch.Tensor,\n            masks: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:\n    \"\"\"\n    Computes average displacement error for the best trajectory is a set,\n    with respect to ground truth\n    :param traj: predictions, shape [batch_size, num_modes, sequence_length, 2]\n    :param traj_gt: ground truth trajectory, shape\n    [batch_size, sequence_length, 2]\n    :param masks: masks for varying length ground truth, shape\n    [batch_size, sequence_length]\n    :return errs, inds: errors and indices for modes with min error, shape\n    [batch_size]\n    \"\"\"\n    num_modes = traj.shape[1]\n    traj_gt_rpt = traj_gt.unsqueeze(1).repeat(1, num_modes, 1, 1)\n    masks_rpt = masks.unsqueeze(1).repeat(1, num_modes, 1)\n    err = traj_gt_rpt - traj[:, :, :, 0:2]\n    err = torch.pow(err, exponent=2)\n    err = torch.sum(err, dim=3)\n    err = torch.pow(err, exponent=0.5)\n    err = torch.sum(err * (1 - masks_rpt), dim=2) / \\\n        torch.clip(torch.sum((1 - masks_rpt), dim=2), min=1)\n    err, inds = torch.min(err, dim=1)\n\n    return err, inds\n\n\ndef min_fde(traj: torch.Tensor, traj_gt: torch.Tensor,\n            masks: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:\n    \"\"\"\n    Computes final displacement error for the best trajectory is a set,\n    with respect to ground truth\n    :param traj: predictions, shape [batch_size, num_modes, sequence_length, 2]\n    :param traj_gt: ground truth trajectory, shape\n    [batch_size, sequence_length, 2]\n    :param masks: masks for varying length ground truth, shape\n    [batch_size, sequence_length]\n    :return errs, inds: errors and indices for modes with min error,\n    shape [batch_size]\n    \"\"\"\n    num_modes = traj.shape[1]\n    traj_gt_rpt = traj_gt.unsqueeze(1).repeat(1, num_modes, 1, 1)\n    lengths = torch.sum(1 - masks, dim=1).long()\n    inds = lengths.unsqueeze(1).unsqueeze(\n        2).unsqueeze(3).repeat(1, num_modes, 1, 2) - 1\n\n    traj_last = torch.gather(traj[..., :2], dim=2, index=inds).squeeze(2)\n    traj_gt_last = torch.gather(traj_gt_rpt, dim=2, index=inds).squeeze(2)\n\n    err = traj_gt_last - traj_last[..., 0:2]\n    err = torch.pow(err, exponent=2)\n    err = torch.sum(err, dim=2)\n    err = torch.pow(err, exponent=0.5)\n    err, inds = torch.min(err, dim=1)\n\n    return err, inds\n\n\ndef miss_rate(\n        traj: torch.Tensor,\n        traj_gt: torch.Tensor,\n        masks: torch.Tensor,\n        dist_thresh: float = 2) -> torch.Tensor:\n    \"\"\"\n    Computes miss rate for mini batch of trajectories,\n    with respect to ground truth and given distance threshold\n    :param traj: predictions, shape [batch_size, num_modes, sequence_length, 2]\n    :param traj_gt: ground truth trajectory,\n    shape [batch_size, sequence_length, 2]\n    :param masks: masks for varying length ground truth,\n    shape [batch_size, sequence_length]\n    :param dist_thresh: distance threshold for computing miss rate.\n    :return errs, inds: errors and indices for modes with min error,\n    shape [batch_size]\n    \"\"\"\n    num_modes = traj.shape[1]\n\n    traj_gt_rpt = traj_gt.unsqueeze(1).repeat(1, num_modes, 1, 1)\n    masks_rpt = masks.unsqueeze(1).repeat(1, num_modes, 1)\n    dist = traj_gt_rpt - traj[:, :, :, 0:2]\n    dist = torch.pow(dist, exponent=2)\n    dist = torch.sum(dist, dim=3)\n    dist = torch.pow(dist, exponent=0.5)\n    dist[masks_rpt.bool()] = -math.inf\n    dist, _ = torch.max(dist, dim=2)\n    dist, _ = torch.min(dist, dim=1)\n    m_r = torch.sum(torch.as_tensor(dist > dist_thresh)) / len(dist)\n\n    return m_r\n\ndef traj_fde(gt_box, pred_box, final_step):\n    if gt_box.traj.shape[0] <= 0:\n        return np.inf\n    final_step = min(gt_box.traj.shape[0], final_step)\n    gt_final = gt_box.traj[None, final_step-1]\n    pred_final = np.array(pred_box.traj)[:,final_step-1,:]\n    err = gt_final - pred_final\n    err = np.sqrt(np.sum(np.square(gt_final - pred_final), axis=-1))\n    return np.min(err)"
  },
  {
    "path": "mmdet3d/datasets/evals/nuscenes_eval_motion.py",
    "content": "import argparse\nimport copy\nimport json\nimport os\nimport time\nfrom typing import Tuple, Dict, Any\nimport numpy as np\n\nfrom nuscenes import NuScenes\nfrom nuscenes.eval.common.config import config_factory\nfrom nuscenes.eval.common.data_classes import EvalBoxes\nfrom nuscenes.eval.detection.data_classes import DetectionConfig\nfrom nuscenes.eval.detection.evaluate import NuScenesEval\nfrom pyquaternion import Quaternion\n\nfrom nuscenes import NuScenes\nfrom nuscenes.eval.common.data_classes import EvalBoxes\nfrom nuscenes.utils.data_classes import Box\nfrom nuscenes.eval.common.loaders import add_center_dist, filter_eval_boxes\nimport tqdm\nfrom nuscenes.utils.geometry_utils import view_points, BoxVisibility\nimport pycocotools.mask as mask_util\nimport argparse\nimport json\nimport os\nimport random\nimport time\nfrom typing import Tuple, Dict, Any\nimport mmcv\nimport numpy as np\n\nfrom nuscenes import NuScenes\nfrom nuscenes.eval.common.config import config_factory\nfrom nuscenes.eval.common.data_classes import EvalBoxes\nfrom nuscenes.eval.common.loaders import add_center_dist, filter_eval_boxes\nfrom nuscenes.eval.detection.algo import calc_ap, calc_tp\nfrom nuscenes.eval.detection.constants import TP_METRICS\nfrom nuscenes.eval.detection.data_classes import DetectionConfig, DetectionMetrics, DetectionBox, \\\n    DetectionMetricDataList\nfrom nuscenes.eval.detection.render import summary_plot, class_pr_curve, dist_pr_curve, visualize_sample\nfrom nuscenes.eval.common.utils import quaternion_yaw, Quaternion\nfrom mmdet3d.core.bbox.iou_calculators import BboxOverlaps3D\nfrom IPython import embed\nimport json\nfrom typing import Any\n\nimport numpy as np\nfrom matplotlib import pyplot as plt\n\nfrom nuscenes import NuScenes\nfrom nuscenes.eval.common.data_classes import EvalBoxes\nfrom nuscenes.eval.common.render import setup_axis\nfrom nuscenes.eval.common.utils import boxes_to_sensor\nfrom nuscenes.eval.detection.constants import TP_METRICS, DETECTION_NAMES, DETECTION_COLORS, TP_METRICS_UNITS, \\\n    PRETTY_DETECTION_NAMES, PRETTY_TP_METRICS\nfrom nuscenes.eval.detection.data_classes import DetectionMetrics, DetectionMetricData, DetectionMetricDataList\nfrom nuscenes.utils.data_classes import LidarPointCloud\nfrom nuscenes.utils.geometry_utils import view_points\nfrom .eval_utils import load_prediction, load_gt, accumulate, accumulate_motion, \\\n    DetectionMotionBox, DetectionMotionBox_modified, DetectionMotionMetricData, \\\n    DetectionMotionMetrics, DetectionMotionMetricDataList\nfrom .metric_utils import traj_fde\nfrom prettytable import PrettyTable\n\nTP_METRICS = [\n    'trans_err',\n    'scale_err',\n    'orient_err',\n    'vel_err',\n    'attr_err',\n    'min_ade_err',\n    'min_fde_err',\n    'miss_rate_err']\nTP_TRAJ_METRICS = ['min_ade_err', 'min_fde_err', 'miss_rate_err']\nAxis = Any\n\n\ndef class_tp_curve(md_list: DetectionMetricDataList,\n                   metrics: DetectionMetrics,\n                   detection_name: str,\n                   min_recall: float,\n                   dist_th_tp: float,\n                   savepath: str = None,\n                   ax: Axis = None) -> None:\n    \"\"\"\n    Plot the true positive curve for the specified class.\n    :param md_list: DetectionMetricDataList instance.\n    :param metrics: DetectionMetrics instance.\n    :param detection_name:\n    :param min_recall: Minimum recall value.\n    :param dist_th_tp: The distance threshold used to determine matches.\n    :param savepath: If given, saves the the rendering here instead of displaying.\n    :param ax: Axes onto which to render.\n    \"\"\"\n    # Get metric data for given detection class with tp distance threshold.\n\n    md = md_list[(detection_name, dist_th_tp)]\n    min_recall_ind = round(100 * min_recall)\n    if min_recall_ind <= md.max_recall_ind:\n        # For traffic_cone and barrier only a subset of the metrics are\n        # plotted.\n        rel_metrics = [\n            m for m in TP_METRICS if not np.isnan(\n                metrics.get_label_tp(\n                    detection_name, m))]\n        ylimit = max([max(getattr(md, metric)[min_recall_ind:md.max_recall_ind + 1])\n                     for metric in rel_metrics]) * 1.1\n    else:\n        ylimit = 1.0\n\n    # Prepare axis.\n    if ax is None:\n        ax = setup_axis(\n            title=PRETTY_DETECTION_NAMES[detection_name],\n            xlabel='Recall',\n            ylabel='Error',\n            xlim=1,\n            min_recall=min_recall)\n    ax.set_ylim(0, ylimit)\n\n    # Plot the recall vs. error curve for each tp metric.\n    for metric in TP_METRICS:\n        tp = metrics.get_label_tp(detection_name, metric)\n\n        # Plot only if we have valid data.\n        if tp is not np.nan and min_recall_ind <= md.max_recall_ind:\n            recall, error = md.recall[:md.max_recall_ind +\n                                      1], getattr(md, metric)[:md.max_recall_ind + 1]\n        else:\n            recall, error = [], []\n\n        # Change legend based on tp value\n        if tp is np.nan:\n            label = '{}: n/a'.format(PRETTY_TP_METRICS[metric])\n        elif min_recall_ind > md.max_recall_ind:\n            label = '{}: nan'.format(PRETTY_TP_METRICS[metric])\n        else:\n            label = '{}: {:.2f} ({})'.format(\n                PRETTY_TP_METRICS[metric], tp, TP_METRICS_UNITS[metric])\n        if metric == 'trans_err':\n            label += f' ({md.max_recall_ind})'  # add recall\n            print(f'Recall: {detection_name}: {md.max_recall_ind/100}')\n        ax.plot(recall, error, label=label)\n    ax.axvline(x=md.max_recall, linestyle='-.', color=(0, 0, 0, 0.3))\n    ax.legend(loc='best')\n\n    if savepath is not None:\n        plt.savefig(savepath)\n        plt.close()\n\n\ndef center_in_image(box,\n                    intrinsic: np.ndarray,\n                    imsize: Tuple[int,\n                                  int],\n                    vis_level: int = BoxVisibility.ANY) -> bool:\n    \"\"\"\n    Check if a box is visible inside an image without accounting for occlusions.\n    :param box: The box to be checked.\n    :param intrinsic: <float: 3, 3>. Intrinsic camera matrix.\n    :param imsize: (width, height).\n    :param vis_level: One of the enumerations of <BoxVisibility>.\n    :return True if visibility condition is satisfied.\n    \"\"\"\n\n    center_3d = box.center.reshape(3, 1)\n    center_img = view_points(center_3d, intrinsic, normalize=True)[:2, :]\n\n    visible = np.logical_and(\n        center_img[0, :] > 0, center_img[0, :] < imsize[0])\n    visible = np.logical_and(visible, center_img[1, :] < imsize[1])\n    visible = np.logical_and(visible, center_img[1, :] > 0)\n    visible = np.logical_and(visible, center_3d[2, :] > 1)\n\n    # True if a corner is at least 0.1 meter in front of the camera.\n    in_front = center_3d[2, :] > 0.1\n\n    if vis_level == BoxVisibility.ALL:\n        return all(visible) and all(in_front)\n    elif vis_level == BoxVisibility.ANY:\n        return any(visible) and all(in_front)\n    elif vis_level == BoxVisibility.NONE:\n        return True\n    else:\n        raise ValueError(\"vis_level: {} not valid\".format(vis_level))\n\n\ndef exist_corners_in_image_but_not_all(box,\n                                       intrinsic: np.ndarray,\n                                       imsize: Tuple[int,\n                                                     int],\n                                       vis_level: int = BoxVisibility.ANY) -> bool:\n    \"\"\"\n    Check if a box is visible in images but not all corners in image .\n    :param box: The box to be checked.\n    :param intrinsic: <float: 3, 3>. Intrinsic camera matrix.\n    :param imsize: (width, height).\n    :param vis_level: One of the enumerations of <BoxVisibility>.\n    :return True if visibility condition is satisfied.\n    \"\"\"\n\n    corners_3d = box.corners()\n    corners_img = view_points(corners_3d, intrinsic, normalize=True)[:2, :]\n\n    visible = np.logical_and(\n        corners_img[0, :] > 0, corners_img[0, :] < imsize[0])\n    visible = np.logical_and(visible, corners_img[1, :] < imsize[1])\n    visible = np.logical_and(visible, corners_img[1, :] > 0)\n    visible = np.logical_and(visible, corners_3d[2, :] > 1)\n\n    # True if a corner is at least 0.1 meter in front of the camera.\n    in_front = corners_3d[2, :] > 0.1\n\n    if any(visible) and not all(visible) and all(in_front):\n        return True\n    else:\n        return False\n\n\ndef filter_eval_boxes_by_id(nusc: NuScenes,\n                            eval_boxes: EvalBoxes,\n                            id=None,\n                            verbose: bool = False) -> EvalBoxes:\n    \"\"\"\n    Applies filtering to boxes. Distance, bike-racks and points per box.\n    :param nusc: An instance of the NuScenes class.\n    :param eval_boxes: An instance of the EvalBoxes class.\n    :param is: the anns token set that used to keep bboxes.\n    :param verbose: Whether to print to stdout.\n    \"\"\"\n\n    # Accumulators for number of filtered boxes.\n    total, anns_filter = 0, 0\n    for ind, sample_token in enumerate(eval_boxes.sample_tokens):\n\n        # Filter on anns\n        total += len(eval_boxes[sample_token])\n        filtered_boxes = []\n        for box in eval_boxes[sample_token]:\n            if box.token in id:\n                filtered_boxes.append(box)\n        anns_filter += len(filtered_boxes)\n        eval_boxes.boxes[sample_token] = filtered_boxes\n\n    if verbose:\n        print(\"=> Original number of boxes: %d\" % total)\n        print(\"=> After anns based filtering: %d\" % anns_filter)\n\n    return eval_boxes\n\n\ndef filter_eval_boxes_by_visibility(\n        ori_eval_boxes: EvalBoxes,\n        visibility=None,\n        verbose: bool = False) -> EvalBoxes:\n    \"\"\"\n    Applies filtering to boxes. Distance, bike-racks and points per box.\n    :param nusc: An instance of the NuScenes class.\n    :param eval_boxes: An instance of the EvalBoxes class.\n    :param is: the anns token set that used to keep bboxes.\n    :param verbose: Whether to print to stdout.\n    \"\"\"\n\n    # Accumulators for number of filtered boxes.\n    eval_boxes = copy.deepcopy(ori_eval_boxes)\n    total, anns_filter = 0, 0\n    for ind, sample_token in enumerate(eval_boxes.sample_tokens):\n        # Filter on anns\n        total += len(eval_boxes[sample_token])\n        filtered_boxes = []\n        for box in eval_boxes[sample_token]:\n            if box.visibility == visibility:\n                filtered_boxes.append(box)\n        anns_filter += len(filtered_boxes)\n        eval_boxes.boxes[sample_token] = filtered_boxes\n\n    if verbose:\n        print(\"=> Original number of boxes: %d\" % total)\n        print(\"=> After visibility based filtering: %d\" % anns_filter)\n\n    return eval_boxes\n\n\ndef filter_by_sample_token(\n        ori_eval_boxes,\n        valid_sample_tokens=[],\n        verbose=False):\n    eval_boxes = copy.deepcopy(ori_eval_boxes)\n    for sample_token in eval_boxes.sample_tokens:\n        if sample_token not in valid_sample_tokens:\n            eval_boxes.boxes.pop(sample_token)\n    return eval_boxes\n\n\ndef filter_eval_boxes_by_overlap(nusc: NuScenes,\n                                 eval_boxes: EvalBoxes,\n                                 verbose: bool = False) -> EvalBoxes:\n    \"\"\"\n    Applies filtering to boxes. basedon overlap .\n    :param nusc: An instance of the NuScenes class.\n    :param eval_boxes: An instance of the EvalBoxes class.\n    :param verbose: Whether to print to stdout.\n    \"\"\"\n\n    # Accumulators for number of filtered boxes.\n    cams = ['CAM_FRONT',\n            'CAM_FRONT_RIGHT',\n            'CAM_BACK_RIGHT',\n            'CAM_BACK',\n            'CAM_BACK_LEFT',\n            'CAM_FRONT_LEFT']\n\n    total, anns_filter = 0, 0\n    for ind, sample_token in enumerate(eval_boxes.sample_tokens):\n\n        # Filter on anns\n        total += len(eval_boxes[sample_token])\n        sample_record = nusc.get('sample', sample_token)\n        filtered_boxes = []\n        for box in eval_boxes[sample_token]:\n            count = 0\n            for cam in cams:\n                '''\n                copy-paste form nuscens\n                '''\n                sample_data_token = sample_record['data'][cam]\n                sd_record = nusc.get('sample_data', sample_data_token)\n                cs_record = nusc.get(\n                    'calibrated_sensor',\n                    sd_record['calibrated_sensor_token'])\n                sensor_record = nusc.get('sensor', cs_record['sensor_token'])\n                pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])\n                cam_intrinsic = np.array(cs_record['camera_intrinsic'])\n                imsize = (sd_record['width'], sd_record['height'])\n                new_box = Box(\n                    box.translation,\n                    box.size,\n                    Quaternion(\n                        box.rotation),\n                    name=box.detection_name,\n                    token='')\n\n                # Move box to ego vehicle coord system.\n                new_box.translate(-np.array(pose_record['translation']))\n                new_box.rotate(Quaternion(pose_record['rotation']).inverse)\n\n                #  Move box to sensor coord system.\n                new_box.translate(-np.array(cs_record['translation']))\n                new_box.rotate(Quaternion(cs_record['rotation']).inverse)\n\n                if center_in_image(\n                        new_box,\n                        cam_intrinsic,\n                        imsize,\n                        vis_level=BoxVisibility.ANY):\n                    count += 1\n                # if exist_corners_in_image_but_not_all(new_box, cam_intrinsic, imsize, vis_level=BoxVisibility.ANY):\n                #    count += 1\n\n            if count > 1:\n                with open('center_overlap.txt', 'a') as f:\n                    try:\n                        f.write(box.token + '\\n')\n                    except BaseException:\n                        pass\n                filtered_boxes.append(box)\n        anns_filter += len(filtered_boxes)\n        eval_boxes.boxes[sample_token] = filtered_boxes\n\n    verbose = True\n\n    if verbose:\n        print(\"=> Original number of boxes: %d\" % total)\n        print(\"=> After anns based filtering: %d\" % anns_filter)\n\n    return eval_boxes\n\n\nclass MotionEval(NuScenesEval):\n    \"\"\"\n    Dummy class for backward-compatibility. Same as DetectionEval.\n    \"\"\"\n\n    def __init__(self,\n                 nusc: NuScenes,\n                 config: DetectionConfig,\n                 result_path: str,\n                 eval_set: str,\n                 output_dir: str = None,\n                 verbose: bool = True,\n                 eval_mask=False,\n                 data_infos=None,\n                 ann_file=None,\n                 category_convert_type='motion_category',\n                 ):\n        \"\"\"\n        Initialize a DetectionEval object.\n        :param nusc: A NuScenes object.\n        :param config: A DetectionConfig object.\n        :param result_path: Path of the nuScenes JSON result file.\n        :param eval_set: The dataset split to evaluate on, e.g. train, val or test.\n        :param output_dir: Folder to save plots and results to.\n        :param verbose: Whether to print to stdout.\n        \"\"\"\n\n        self.nusc = nusc\n        self.result_path = result_path\n        self.eval_set = eval_set\n        self.output_dir = output_dir\n        self.verbose = verbose\n        self.cfg = config\n        self.eval_mask = eval_mask\n        self.data_infos = data_infos\n        # Check result file exists.\n        assert os.path.exists(\n            result_path), 'Error: The result file does not exist!'\n\n        # Make dirs.\n        self.plot_dir = os.path.join(self.output_dir, 'plots')\n        if not os.path.isdir(self.output_dir):\n            os.makedirs(self.output_dir)\n        if not os.path.isdir(self.plot_dir):\n            os.makedirs(self.plot_dir)\n\n        # Load data.\n        if verbose:\n            print('Initializing nuScenes detection evaluation')\n        self.pred_boxes, self.meta = load_prediction(self.result_path, self.cfg.max_boxes_per_sample, DetectionMotionBox,\n                                                     verbose=verbose, category_convert_type=category_convert_type)\n        # data = mmcv.load(ann_file, file_format='pkl')\n        # data_infos = {}\n\n        # for info in data['infos']:\n        #     data_infos[info['token']] = info\n\n        self.gt_boxes = load_gt(\n            self.nusc,\n            self.eval_set,\n            DetectionMotionBox_modified,\n            verbose=verbose,\n            category_convert_type=category_convert_type)\n\n        assert set(self.pred_boxes.sample_tokens) == set(self.gt_boxes.sample_tokens), \\\n            \"Samples in split doesn't match samples in predictions.\"\n\n        # Add center distances.\n        self.pred_boxes = add_center_dist(nusc, self.pred_boxes)\n        self.gt_boxes = add_center_dist(nusc, self.gt_boxes)\n\n        # Filter boxes (distance, points per box, etc.).\n\n        if verbose:\n            print('Filtering predictions')\n        self.pred_boxes = filter_eval_boxes(\n            nusc, self.pred_boxes, self.cfg.class_range, verbose=verbose)\n        if verbose:\n            print('Filtering ground truth annotations')\n        self.gt_boxes = filter_eval_boxes(\n            nusc, self.gt_boxes, self.cfg.class_range, verbose=verbose)\n\n        # if self.overlap_test:\n        #     self.pred_boxes = filter_eval_boxes_by_overlap(\n        #         self.nusc, self.pred_boxes)\n\n        #     self.gt_boxes = filter_eval_boxes_by_overlap(\n        #         self.nusc, self.gt_boxes, verbose=True)\n\n        self.all_gt = copy.deepcopy(self.gt_boxes)\n        self.all_preds = copy.deepcopy(self.pred_boxes)\n        self.sample_tokens = self.gt_boxes.sample_tokens\n\n        self.index_map = {}\n        for scene in nusc.scene:\n            first_sample_token = scene['first_sample_token']\n            sample = nusc.get('sample', first_sample_token)\n            self.index_map[first_sample_token] = 1\n            index = 2\n            while sample['next'] != '':\n                sample = nusc.get('sample', sample['next'])\n                self.index_map[sample['token']] = index\n                index += 1\n\n    def update_gt(self, type_='vis', visibility='1', index=1):\n        if type_ == 'vis':\n            self.visibility_test = True\n            if self.visibility_test:\n                '''[{'description': 'visibility of whole object is between 0 and 40%',\n                'token': '1',\n                'level': 'v0-40'},\n                {'description': 'visibility of whole object is between 40 and 60%',\n                'token': '2',\n                'level': 'v40-60'},\n                {'description': 'visibility of whole object is between 60 and 80%',\n                'token': '3',\n                'level': 'v60-80'},\n                {'description': 'visibility of whole object is between 80 and 100%',\n                'token': '4',\n                'level': 'v80-100'}]'''\n\n                self.gt_boxes = filter_eval_boxes_by_visibility(\n                    self.all_gt, visibility, verbose=True)\n\n        elif type_ == 'ord':\n\n            valid_tokens = [\n                key for (\n                    key,\n                    value) in self.index_map.items() if value == index]\n            # from IPython import embed\n            # embed()\n            self.gt_boxes = filter_by_sample_token(self.all_gt, valid_tokens)\n            self.pred_boxes = filter_by_sample_token(\n                self.all_preds, valid_tokens)\n        self.sample_tokens = self.gt_boxes.sample_tokens\n\n    def evaluate(self) -> Tuple[DetectionMotionMetrics,\n                                DetectionMotionMetricDataList]:\n        \"\"\"\n        Performs the actual evaluation.\n        :return: A tuple of high-level and the raw metric data.\n        \"\"\"\n        start_time = time.time()\n\n        # -----------------------------------\n        # Step 1: Accumulate metric data for all classes and distance thresholds.\n        # -----------------------------------\n        if self.verbose:\n            print('Accumulating metric data...')\n        metric_data_list = DetectionMotionMetricDataList()\n\n        # print(self.cfg.dist_fcn_callable, self.cfg.dist_ths)\n        # self.cfg.dist_ths = [0.3]\n        # self.cfg.dist_fcn_callable\n        for class_name in self.cfg.class_names:\n            for dist_th in self.cfg.dist_ths:\n                md, _, _, _ = accumulate(\n                    self.gt_boxes, self.pred_boxes, class_name, self.cfg.dist_fcn_callable, dist_th)\n                metric_data_list.set(class_name, dist_th, md)\n        # from IPython import embed\n        # embed()\n        # exit()\n        # -----------------------------------\n        # Step 2: Calculate metrics from the data.\n        # -----------------------------------\n        if self.verbose:\n            print('Calculating metrics...')\n        metrics = DetectionMotionMetrics(self.cfg)\n\n        traj_metrics = {}\n        for class_name in self.cfg.class_names:\n            # Compute APs.\n            for dist_th in self.cfg.dist_ths:\n                metric_data = metric_data_list[(class_name, dist_th)]\n                ap = calc_ap(\n                    metric_data,\n                    self.cfg.min_recall,\n                    self.cfg.min_precision)\n                metrics.add_label_ap(class_name, dist_th, ap)\n            # Compute TP metrics.\n            for metric_name in TP_METRICS:\n                metric_data = metric_data_list[(\n                    class_name, self.cfg.dist_th_tp)]\n                if class_name in ['traffic_cone'] and metric_name in [\n                        'attr_err', 'vel_err', 'orient_err']:\n                    tp = np.nan\n                elif class_name in ['barrier'] and metric_name in ['attr_err', 'vel_err']:\n                    tp = np.nan\n                else:\n                    tp = calc_tp(metric_data, self.cfg.min_recall, metric_name)\n                    if metric_name in TP_TRAJ_METRICS:\n                        if class_name not in traj_metrics:\n                            traj_metrics[class_name] = {}\n                        traj_metrics[class_name][metric_name] = tp\n                metrics.add_label_tp(class_name, metric_name, tp)\n        print_traj_metrics(traj_metrics)\n\n        # Compute evaluation time.\n        metrics.add_runtime(time.time() - start_time)\n\n        return metrics, metric_data_list\n\n    def evaluate_motion(\n            self) -> Tuple[DetectionMotionMetrics, DetectionMotionMetricDataList]:\n        \"\"\"\n        Performs the actual evaluation.\n        :return: A tuple of high-level and the raw metric data.\n        \"\"\"\n        start_time = time.time()\n\n        self.cfg.dist_ths = [1.0]\n        self.cfg.dist_th_tp = 1.0  # center dist for detection\n        traj_dist_th = 2.0  # FDE for traj\n\n        # -----------------------------------\n        # Step 1: Accumulate metric data for all classes and distance thresholds.\n        # -----------------------------------\n        if self.verbose:\n            print('Accumulating metric data...')\n        metric_data_list = DetectionMotionMetricDataList()\n\n        for class_name in self.cfg.class_names:\n            for dist_th in self.cfg.dist_ths:\n                md, _, _, _ = accumulate_motion(\n                    self.gt_boxes, self.pred_boxes, class_name, self.cfg.dist_fcn_callable, traj_fde, dist_th, traj_dist_th)\n                metric_data_list.set(class_name, dist_th, md)\n\n        # -----------------------------------\n        # Step 2: Calculate metrics from the data.\n        # -----------------------------------\n        if self.verbose:\n            print('Calculating metrics...')\n        metrics = DetectionMotionMetrics(self.cfg)\n\n        traj_metrics = {}\n        for class_name in self.cfg.class_names:\n            # Compute APs.\n            for dist_th in self.cfg.dist_ths:\n                metric_data = metric_data_list[(class_name, dist_th)]\n                ap = calc_ap(\n                    metric_data,\n                    self.cfg.min_recall,\n                    self.cfg.min_precision)\n                metrics.add_label_ap(class_name, dist_th, ap)\n            # Compute TP metrics.\n            for metric_name in TP_METRICS:\n                metric_data = metric_data_list[(\n                    class_name, self.cfg.dist_th_tp)]\n                if class_name in ['traffic_cone'] and metric_name in [\n                        'attr_err', 'vel_err', 'orient_err']:\n                    tp = np.nan\n                elif class_name in ['barrier'] and metric_name in ['attr_err', 'vel_err']:\n                    tp = np.nan\n                else:\n                    tp = calc_tp(metric_data, self.cfg.min_recall, metric_name)\n                    if metric_name in TP_TRAJ_METRICS:\n                        if class_name not in traj_metrics:\n                            traj_metrics[class_name] = {}\n                        traj_metrics[class_name][metric_name] = tp\n                metrics.add_label_tp(class_name, metric_name, tp)\n        print_traj_metrics(traj_metrics)\n\n        # Compute evaluation time.\n        metrics.add_runtime(time.time() - start_time)\n\n        return metrics, metric_data_list\n\n    def evaluate_epa(\n            self) -> Tuple[DetectionMotionMetrics, DetectionMotionMetricDataList]:\n        \"\"\"\n        Performs the actual evaluation.\n        :return: A tuple of high-level and the raw metric data.\n        \"\"\"\n        start_time = time.time()\n\n        self.cfg.dist_ths = [2.0]\n        self.cfg.dist_th_tp = 2.0  # center dist for detection\n        traj_dist_th = 2.0  # FDE for traj\n\n        # -----------------------------------\n        # Step 1: Accumulate metric data for all classes and distance thresholds.\n        # -----------------------------------\n        if self.verbose:\n            print('Accumulating metric data...')\n        metric_data_list = DetectionMotionMetricDataList()\n\n        for class_name in self.cfg.class_names:\n            for dist_th in self.cfg.dist_ths:\n                md, N_det_tp, N_det_fp, N_det_gt = accumulate(\n                    self.gt_boxes, self.pred_boxes, class_name, self.cfg.dist_fcn_callable, dist_th)\n                md, N_det_traj_tp, N_det_traj_fp, N_det_traj_gt = accumulate_motion(\n                    self.gt_boxes, self.pred_boxes, class_name, self.cfg.dist_fcn_callable, traj_fde, dist_th, traj_dist_th)\n                metric_data_list.set(class_name, dist_th, md)\n                EPA = (N_det_traj_tp - 0.5 * N_det_fp) / (N_det_gt + 1e-5)\n                print(N_det_traj_tp, N_det_fp, N_det_gt)\n                print('EPA ', class_name, EPA)\n\n        # -----------------------------------\n        # Step 2: Calculate metrics from the data.\n        # -----------------------------------\n        if self.verbose:\n            print('Calculating metrics...')\n        metrics = DetectionMotionMetrics(self.cfg)\n\n        traj_metrics = {}\n        for class_name in self.cfg.class_names:\n            # Compute APs.\n            for dist_th in self.cfg.dist_ths:\n                metric_data = metric_data_list[(class_name, dist_th)]\n                ap = calc_ap(\n                    metric_data,\n                    self.cfg.min_recall,\n                    self.cfg.min_precision)\n                metrics.add_label_ap(class_name, dist_th, ap)\n            # Compute TP metrics.\n            for metric_name in TP_METRICS:\n                metric_data = metric_data_list[(\n                    class_name, self.cfg.dist_th_tp)]\n                if class_name in ['traffic_cone'] and metric_name in [\n                        'attr_err', 'vel_err', 'orient_err']:\n                    tp = np.nan\n                elif class_name in ['barrier'] and metric_name in ['attr_err', 'vel_err']:\n                    tp = np.nan\n                else:\n                    tp = calc_tp(metric_data, self.cfg.min_recall, metric_name)\n                    if metric_name in TP_TRAJ_METRICS:\n                        if class_name not in traj_metrics:\n                            traj_metrics[class_name] = {}\n                        traj_metrics[class_name][metric_name] = tp\n                metrics.add_label_tp(class_name, metric_name, tp)\n        print_traj_metrics(traj_metrics)\n\n        # Compute evaluation time.\n        metrics.add_runtime(time.time() - start_time)\n\n        return metrics, metric_data_list\n\n    def main(self,\n             plot_examples: int = 0,\n             render_curves: bool = True,\n             eval_mode: str = 'standard') -> Dict[str, Any]:\n        \"\"\"\n        Main function that loads the evaluation code, visualizes samples, runs the evaluation and renders stat plots.\n        :param plot_examples: How many example visualizations to write to disk.\n        :param render_curves: Whether to render PR and TP curves to disk.\n        :return: A dict that stores the high-level metrics and meta data.\n        \"\"\"\n        if plot_examples > 0:\n            # Select a random but fixed subset to plot.\n            random.seed(42)\n            sample_tokens = list(self.sample_tokens)\n            random.shuffle(sample_tokens)\n            sample_tokens = sample_tokens[:plot_examples]\n\n            # Visualize samples.\n            example_dir = os.path.join(self.output_dir, 'examples')\n            if not os.path.isdir(example_dir):\n                os.mkdir(example_dir)\n            for sample_token in sample_tokens:\n                visualize_sample(self.nusc,\n                                 sample_token,\n                                 self.gt_boxes if self.eval_set != 'test' else EvalBoxes(),\n                                 # Don't render test GT.\n                                 self.pred_boxes,\n                                 eval_range=max(self.cfg.class_range.values()),\n                                 savepath=os.path.join(example_dir, '{}.png'.format(sample_token)))\n\n        # Run evaluation.\n        if eval_mode == 'motion_map':\n            metrics, metric_data_list = self.evaluate_motion()\n        elif eval_mode == 'standard':\n            metrics, metric_data_list = self.evaluate()\n        elif eval_mode == 'epa':\n            metrics, metric_data_list = self.evaluate_epa()\n        else:\n            raise NotImplementedError\n        # Render PR and TP curves.\n        if render_curves:\n            self.render(metrics, metric_data_list)\n\n        # Dump the metric data, meta and metrics to disk.\n        if self.verbose:\n            print('Saving metrics to: %s' % self.output_dir)\n        metrics_summary = metrics.serialize()\n        metrics_summary['meta'] = self.meta.copy()\n        with open(os.path.join(self.output_dir, 'metrics_summary.json'), 'w') as f:\n            json.dump(metrics_summary, f, indent=2)\n        with open(os.path.join(self.output_dir, 'metrics_details.json'), 'w') as f:\n            json.dump(metric_data_list.serialize(), f, indent=2)\n\n        # Print high-level metrics.\n        print('mAP: %.4f' % (metrics_summary['mean_ap']))\n        err_name_mapping = {\n            'trans_err': 'mATE',\n            'scale_err': 'mASE',\n            'orient_err': 'mAOE',\n            'vel_err': 'mAVE',\n            'attr_err': 'mAAE'\n        }\n        for tp_name, tp_val in metrics_summary['tp_errors'].items():\n            print('%s: %.4f' % (err_name_mapping[tp_name], tp_val))\n        print('NDS: %.4f' % (metrics_summary['nd_score']))\n        print('Eval time: %.1fs' % metrics_summary['eval_time'])\n\n        # Print per-class metrics.\n        print()\n        print('Per-class results:')\n        print('Object Class\\tAP\\tATE\\tASE\\tAOE\\tAVE\\tAAE')\n        class_aps = metrics_summary['mean_dist_aps']\n        class_tps = metrics_summary['label_tp_errors']\n        for class_name in class_aps.keys():\n            print('%s\\t%.3f\\t%.3f\\t%.3f\\t%.3f\\t%.3f\\t%.3f'\n                  % (class_name, class_aps[class_name],\n                     class_tps[class_name]['trans_err'],\n                     class_tps[class_name]['scale_err'],\n                     class_tps[class_name]['orient_err'],\n                     class_tps[class_name]['vel_err'],\n                     class_tps[class_name]['attr_err']))\n\n        return metrics_summary\n\n    def render(self, metrics: DetectionMetrics,\n               md_list: DetectionMetricDataList) -> None:\n        \"\"\"\n        Renders various PR and TP curves.\n        :param metrics: DetectionMetrics instance.\n        :param md_list: DetectionMetricDataList instance.\n        \"\"\"\n        if self.verbose:\n            print('Rendering PR and TP curves')\n\n        def savepath(name):\n            return os.path.join(self.plot_dir, name + '.pdf')\n\n        summary_plot(\n            md_list,\n            metrics,\n            min_precision=self.cfg.min_precision,\n            min_recall=self.cfg.min_recall,\n            dist_th_tp=self.cfg.dist_th_tp,\n            savepath=savepath('summary'))\n\n        for detection_name in self.cfg.class_names:\n            class_pr_curve(\n                md_list,\n                metrics,\n                detection_name,\n                self.cfg.min_precision,\n                self.cfg.min_recall,\n                savepath=savepath(\n                    detection_name +\n                    '_pr'))\n\n            class_tp_curve(\n                md_list,\n                metrics,\n                detection_name,\n                self.cfg.min_recall,\n                self.cfg.dist_th_tp,\n                savepath=savepath(\n                    detection_name +\n                    '_tp'))\n\n        for dist_th in self.cfg.dist_ths:\n            dist_pr_curve(\n                md_list,\n                metrics,\n                dist_th,\n                self.cfg.min_precision,\n                self.cfg.min_recall,\n                savepath=savepath(\n                    'dist_pr_' +\n                    str(dist_th)))\n\n\ndef print_traj_metrics(metrics):\n    class_names = metrics.keys()\n    x = PrettyTable()\n    x.field_names = [\"class names\"] + TP_TRAJ_METRICS\n    for class_name in metrics.keys():\n        row_data = [class_name]\n        for m in TP_TRAJ_METRICS:\n            row_data.append('%.4f' % metrics[class_name][m])\n        x.add_row(row_data)\n    print(x)\n\n\nif __name__ == \"__main__\":\n\n    # Settings.\n    parser = argparse.ArgumentParser(\n        description='Evaluate nuScenes detection results.',\n        formatter_class=argparse.ArgumentDefaultsHelpFormatter)\n    parser.add_argument(\n        'result_path',\n        type=str,\n        help='The submission as a JSON file.')\n    parser.add_argument(\n        '--output_dir',\n        type=str,\n        default='~/nuscenes-metrics',\n        help='Folder to store result metrics, graphs and example visualizations.')\n    parser.add_argument(\n        '--eval_set',\n        type=str,\n        default='val',\n        help='Which dataset split to evaluate on, train, val or test.')\n    parser.add_argument('--dataroot', type=str, default='data/nuscenes',\n                        help='Default nuScenes data directory.')\n    parser.add_argument(\n        '--version',\n        type=str,\n        default='v1.0-mini',\n        help='Which version of the nuScenes dataset to evaluate on, e.g. v1.0-trainval.')\n    parser.add_argument(\n        '--config_path',\n        type=str,\n        default='',\n        help='Path to the configuration file.'\n        'If no path given, the CVPR 2019 configuration will be used.')\n    parser.add_argument(\n        '--plot_examples',\n        type=int,\n        default=0,\n        help='How many example visualizations to write to disk.')\n    parser.add_argument('--render_curves', type=int, default=1,\n                        help='Whether to render PR and TP curves to disk.')\n    parser.add_argument('--verbose', type=int, default=1,\n                        help='Whether to print to stdout.')\n    args = parser.parse_args()\n\n    result_path_ = os.path.expanduser(args.result_path)\n    output_dir_ = os.path.expanduser(args.output_dir)\n    eval_set_ = args.eval_set\n    dataroot_ = args.dataroot\n    version_ = args.version\n    config_path = args.config_path\n    plot_examples_ = args.plot_examples\n    render_curves_ = bool(args.render_curves)\n    verbose_ = bool(args.verbose)\n\n    if config_path == '':\n        cfg_ = config_factory('detection_cvpr_2019')\n    else:\n        with open(config_path, 'r') as _f:\n            cfg_ = DetectionConfig.deserialize(json.load(_f))\n\n    nusc_ = NuScenes(version=version_, verbose=verbose_, dataroot=dataroot_)\n    nusc_eval = MotionEval(\n        nusc_,\n        config=cfg_,\n        result_path=result_path_,\n        eval_set=eval_set_,\n        output_dir=output_dir_,\n        verbose=verbose_)\n    for vis in ['1', '2', '3', '4']:\n        nusc_eval.update_gt(type_='vis', visibility=vis)\n        print(f'================ {vis} ===============')\n        nusc_eval.main(\n            plot_examples=plot_examples_,\n            render_curves=render_curves_)\n"
  },
  {
    "path": "mmdet3d/datasets/evaluation/AP.py",
    "content": "import numpy as np\nfrom .distance import chamfer_distance, frechet_distance, chamfer_distance_batch\nfrom typing import List, Tuple, Union\nfrom numpy.typing import NDArray\nimport torch\n\ndef average_precision(recalls, precisions, mode='area'):\n    \"\"\"Calculate average precision. \n\n    Args:\n        recalls (ndarray): shape (num_dets, )\n        precisions (ndarray): shape (num_dets, )\n        mode (str): 'area' or '11points', 'area' means calculating the area\n            under precision-recall curve, '11points' means calculating\n            the average precision of recalls at [0, 0.1, ..., 1]\n\n    Returns:\n        float: calculated average precision\n    \"\"\"\n\n    recalls = recalls[np.newaxis, :]\n    precisions = precisions[np.newaxis, :]\n\n    assert recalls.shape == precisions.shape and recalls.ndim == 2\n    num_scales = recalls.shape[0]\n    ap = 0.\n\n    if mode == 'area':\n        zeros = np.zeros((num_scales, 1), dtype=recalls.dtype)\n        ones = np.ones((num_scales, 1), dtype=recalls.dtype)\n        mrec = np.hstack((zeros, recalls, ones))\n        mpre = np.hstack((zeros, precisions, zeros))\n        for i in range(mpre.shape[1] - 1, 0, -1):\n            mpre[:, i - 1] = np.maximum(mpre[:, i - 1], mpre[:, i])\n        \n        ind = np.where(mrec[0, 1:] != mrec[0, :-1])[0]\n        ap = np.sum(\n            (mrec[0, ind + 1] - mrec[0, ind]) * mpre[0, ind + 1])\n    \n    elif mode == '11points':\n        for thr in np.arange(0, 1 + 1e-3, 0.1):\n            precs = precisions[0, recalls[i, :] >= thr]\n            prec = precs.max() if precs.size > 0 else 0\n            ap += prec\n        ap /= 11\n    else:\n        raise ValueError(\n            'Unrecognized mode, only \"area\" and \"11points\" are supported')\n    \n    return ap\n\ndef instance_match(pred_lines: NDArray, \n                   scores: NDArray, \n                   gt_lines: NDArray, \n                   thresholds: Union[Tuple, List], \n                   metric: str='chamfer') -> List:\n    \"\"\"Compute whether detected lines are true positive or false positive.\n\n    Args:\n        pred_lines (array): Detected lines of a sample, of shape (M, INTERP_NUM, 2 or 3).\n        scores (array): Confidence score of each line, of shape (M, ).\n        gt_lines (array): GT lines of a sample, of shape (N, INTERP_NUM, 2 or 3).\n        thresholds (list of tuple): List of thresholds.\n        metric (str): Distance function for lines matching. Default: 'chamfer'.\n\n    Returns:\n        list_of_tp_fp (list): tp-fp matching result at all thresholds\n    \"\"\"\n\n    if metric == 'chamfer':\n        distance_fn = chamfer_distance\n\n    elif metric == 'frechet':\n        distance_fn = frechet_distance\n    \n    else:\n        raise ValueError(f'unknown distance function {metric}')\n\n    num_preds = pred_lines.shape[0]\n    num_gts = gt_lines.shape[0]\n\n    # tp and fp\n    tp_fp_list = []\n    tp = np.zeros((num_preds), dtype=np.float32)\n    fp = np.zeros((num_preds), dtype=np.float32)\n\n    # if there is no gt lines in this sample, then all pred lines are false positives\n    if num_gts == 0:\n        fp[...] = 1\n        for thr in thresholds:\n            tp_fp_list.append((tp.copy(), fp.copy()))\n        return tp_fp_list\n    \n    if num_preds == 0:\n        for thr in thresholds:\n            tp_fp_list.append((tp.copy(), fp.copy()))\n        return tp_fp_list\n\n    assert pred_lines.shape[1] == gt_lines.shape[1], \\\n        \"sample points num should be the same\"\n\n    # distance matrix: M x N\n    matrix = np.zeros((num_preds, num_gts))\n\n    # for i in range(num_preds):\n    #     for j in range(num_gts):\n    #         matrix[i, j] = distance_fn(pred_lines[i], gt_lines[j])\n    \n    matrix = chamfer_distance_batch(pred_lines, gt_lines)\n    # for each det, the min distance with all gts\n    matrix_min = matrix.min(axis=1)\n\n    # for each det, which gt is the closest to it\n    matrix_argmin = matrix.argmin(axis=1)\n    # sort all dets in descending order by scores\n    sort_inds = np.argsort(-scores)\n\n    # match under different thresholds\n    for thr in thresholds:\n        tp = np.zeros((num_preds), dtype=np.float32)\n        fp = np.zeros((num_preds), dtype=np.float32)\n\n        gt_covered = np.zeros(num_gts, dtype=bool)\n        for i in sort_inds:\n            if matrix_min[i] <= thr:\n                matched_gt = matrix_argmin[i]\n                if not gt_covered[matched_gt]:\n                    gt_covered[matched_gt] = True\n                    tp[i] = 1\n                else:\n                    fp[i] = 1\n            else:\n                fp[i] = 1\n        \n        tp_fp_list.append((tp, fp))\n\n    return tp_fp_list"
  },
  {
    "path": "mmdet3d/datasets/evaluation/__init__.py",
    "content": ""
  },
  {
    "path": "mmdet3d/datasets/evaluation/distance.py",
    "content": "from scipy.spatial import distance\nfrom numpy.typing import NDArray\nimport torch\n\ndef chamfer_distance(line1: NDArray, line2: NDArray) -> float:\n    ''' Calculate chamfer distance between two lines. Make sure the \n    lines are interpolated.\n\n    Args:\n        line1 (array): coordinates of line1\n        line2 (array): coordinates of line2\n    \n    Returns:\n        distance (float): chamfer distance\n    '''\n    \n    dist_matrix = distance.cdist(line1, line2, 'euclidean')\n    dist12 = dist_matrix.min(-1).sum() / len(line1)\n    dist21 = dist_matrix.min(-2).sum() / len(line2)\n\n    return (dist12 + dist21) / 2\n\ndef frechet_distance(line1: NDArray, line2: NDArray) -> float:\n    ''' Calculate frechet distance between two lines. Make sure the \n    lines are interpolated.\n\n    Args:\n        line1 (array): coordinates of line1\n        line2 (array): coordinates of line2\n    \n    Returns:\n        distance (float): frechet distance\n    '''\n    \n    raise NotImplementedError\n\ndef chamfer_distance_batch(pred_lines, gt_lines):\n    ''' Calculate chamfer distance between two group of lines. Make sure the \n    lines are interpolated.\n\n    Args:\n        pred_lines (array or tensor): shape (m, num_pts, 2 or 3)\n        gt_lines (array or tensor): shape (n, num_pts, 2 or 3)\n    \n    Returns:\n        distance (array): chamfer distance\n    '''\n    _, num_pts, coord_dims = pred_lines.shape\n    \n    if not isinstance(pred_lines, torch.Tensor):\n        pred_lines = torch.tensor(pred_lines)\n    if not isinstance(gt_lines, torch.Tensor):\n        gt_lines = torch.tensor(gt_lines)\n    dist_mat = torch.cdist(pred_lines.view(-1, coord_dims), \n                    gt_lines.view(-1, coord_dims), p=2) \n    # (num_query*num_points, num_gt*num_points)\n    dist_mat = torch.stack(torch.split(dist_mat, num_pts)) \n    # (num_query, num_points, num_gt*num_points)\n    dist_mat = torch.stack(torch.split(dist_mat, num_pts, dim=-1)) \n    # (num_gt, num_q, num_pts, num_pts)\n\n    dist1 = dist_mat.min(-1)[0].sum(-1)\n    dist2 = dist_mat.min(-2)[0].sum(-1)\n\n    dist_matrix = (dist1 + dist2).transpose(0, 1) / (2 * num_pts)\n    \n    return dist_matrix.numpy()"
  },
  {
    "path": "mmdet3d/datasets/evaluation/raster_eval.py",
    "content": "import torch\nfrom mmdet3d.datasets import build_dataset, build_dataloader\nimport mmcv\nfrom functools import cached_property\nimport prettytable\nfrom numpy.typing import NDArray\nfrom typing import Dict, Optional\nfrom logging import Logger\nfrom mmcv import Config\nfrom copy import deepcopy\n\nN_WORKERS = 16\n\nclass RasterEvaluate(object):\n    \"\"\"Evaluator for rasterized map.\n\n    Args:\n        dataset_cfg (Config): dataset cfg for gt\n        n_workers (int): num workers to parallel\n    \"\"\"\n\n    def __init__(self, dataset_cfg: Config, n_workers: int=N_WORKERS):\n        self.dataset = build_dataset(dataset_cfg)\n        self.dataloader = build_dataloader(\n            self.dataset, samples_per_gpu=1, workers_per_gpu=n_workers, shuffle=False, dist=False)\n        self.cat2id = self.dataset.cat2id\n        self.id2cat = {v: k for k, v in self.cat2id.items()}\n        self.n_workers = n_workers\n\n    @cached_property\n    def gts(self) -> Dict[str, NDArray]:\n        print('collecting gts...')\n        gts = {}\n        for data in mmcv.track_iter_progress(self.dataloader):\n            token = deepcopy(data['img_metas'].data[0][0]['token'])\n            gt = deepcopy(data['semantic_mask'].data[0][0])\n            gts[token] = gt\n            del data # avoid dataloader memory crash\n        \n        return gts\n\n    def evaluate(self, \n                 result_path: str, \n                 logger: Optional[Logger]=None) -> Dict[str, float]:\n        ''' Do evaluation for a submission file and print evalution results to `logger` if specified.\n        The submission will be aligned by tokens before evaluation. \n        \n        Args:\n            result_path (str): path to submission file\n            logger (Logger): logger to print evaluation result, Default: None\n        \n        Returns:\n            result_dict (Dict): evaluation results. IoU by categories.\n        '''\n        \n        results = mmcv.load(result_path)\n        meta = results['meta']\n        results = results['results']\n\n        result_dict = {}\n\n        gts = []\n        preds = []\n        for token, gt in self.gts.items():\n            gts.append(gt)\n            if token in results:\n                pred = results[token]['semantic_mask']\n            else:\n                pred = torch.zeros((len(self.cat2id), \n                    self.canvas_size[1], self.canvas_size[0])).bool()\n            \n            preds.append(pred)\n        \n        preds = torch.stack(preds).bool()\n        gts = torch.stack(gts).bool()\n\n        # for every label\n        total = 0\n        for i in range(gts.shape[1]):\n            category = self.id2cat[i]\n            pred = preds[:, i]\n            gt = gts[:, i]\n            intersect = (pred & gt).sum().float().item()\n            union = (pred | gt).sum().float().item()\n            result_dict[category] = intersect / (union + 1e-7)\n            total += result_dict[category]\n        \n        mIoU = total / gts.shape[1]\n        result_dict['mIoU'] = mIoU\n        \n        categories = list(self.cat2id.keys())\n        table = prettytable.PrettyTable([' ', *categories, 'mean'])\n        table.add_row(['IoU', \n            *[round(result_dict[cat], 4) for cat in categories], \n            round(mIoU, 4)])\n        \n        if logger:\n            from mmcv.utils import print_log\n            print_log('\\n'+str(table), logger=logger)\n            print_log(f'mIoU = {mIoU:.4f}\\n', logger=logger)\n\n        return result_dict\n"
  },
  {
    "path": "mmdet3d/datasets/evaluation/vector_eval.py",
    "content": "from functools import partial\nimport numpy as np\nfrom multiprocessing import Pool\nfrom mmdet3d.datasets import build_dataset, build_dataloader\nimport mmcv\nfrom .AP import instance_match, average_precision\nimport prettytable\nfrom time import time\nfrom functools import cached_property\nfrom shapely.geometry import LineString\nfrom numpy.typing import NDArray\nfrom typing import Dict, List, Optional\nfrom logging import Logger\nfrom mmcv import Config\nfrom copy import deepcopy\nimport os\nfrom IPython import embed\n\nINTERP_NUM = 200 # number of points to interpolate during evaluation\nTHRESHOLDS = [0.5, 1.0, 1.5] # AP thresholds\nN_WORKERS = 16 # num workers to parallel\nSAMPLE_DIST = 0.15\n\nclass VectorEvaluate(object):\n    \"\"\"Evaluator for vectorized map.\n\n    Args:\n        dataset_cfg (Config): dataset cfg for gt\n        n_workers (int): num workers to parallel\n    \"\"\"\n\n    def __init__(self, dataset_cfg: Config, n_workers: int=N_WORKERS) -> None:\n        self.dataset = build_dataset(dataset_cfg)\n        self.cat2id = self.dataset.cat2id\n        self.id2cat = {v: k for k, v in self.cat2id.items()}\n        self.n_workers = n_workers\n        self.new_split = 'newsplit' in self.dataset.ann_file\n        self.roi_size = self.dataset.roi_size\n        if self.roi_size == (60, 30):\n            self.thresholds = [0.5, 1.0, 1.5]\n        elif self.roi_size == (100, 50):\n            self.thresholds = [1.0, 1.5, 2.0]\n        \n    @cached_property\n    def gts(self) -> Dict[str, Dict[int, List[NDArray]]]:\n        roi_size = self.dataset.roi_size\n        if 'av2' in self.dataset.ann_file:\n            dataset = 'av2'\n        else:\n            dataset = 'nusc'\n        if self.new_split:\n            tmp_file = f'./tmp_gts_{dataset}_{roi_size[0]}x{roi_size[1]}_newsplit.pkl'\n        else:\n            tmp_file = f'./tmp_gts_{dataset}_{roi_size[0]}x{roi_size[1]}.pkl'\n        if os.path.exists(tmp_file):\n            print(f'loading cached gts from {tmp_file}')\n            gts = mmcv.load(tmp_file)\n            return gts\n        \n        print('collecting gts...')\n        gts = {}\n        self.dataloader = build_dataloader(\n            self.dataset, samples_per_gpu=1, workers_per_gpu=self.n_workers, shuffle=False, dist=False)\n        pbar = mmcv.ProgressBar(len(self.dataloader))\n        for data in self.dataloader:\n            token = deepcopy(data['img_metas'].data[0][0]['token'])\n            gt = deepcopy(data['vectors'].data[0][0])\n            gts[token] = gt\n            pbar.update()\n            del data # avoid dataloader memory crash\n        \n        if not os.path.exists(tmp_file):\n            print(f\"saving gt to {tmp_file}\")\n            mmcv.dump(gts, tmp_file)\n        return gts\n    \n    def interp_fixed_num(self, \n                         vector: NDArray, \n                         num_pts: int) -> NDArray:\n        ''' Interpolate a polyline.\n        \n        Args:\n            vector (array): line coordinates, shape (M, 2)\n            num_pts (int): \n        \n        Returns:\n            sampled_points (array): interpolated coordinates\n        '''\n        line = LineString(vector)\n        distances = np.linspace(0, line.length, num_pts)\n        sampled_points = np.array([list(line.interpolate(distance).coords) \n            for distance in distances]).squeeze()\n        \n        return sampled_points\n    \n    def interp_fixed_dist(self, \n                          vector: NDArray,\n                          sample_dist: float) -> NDArray:\n        ''' Interpolate a line at fixed interval.\n        \n        Args:\n            vector (LineString): vector\n            sample_dist (float): sample interval\n        \n        Returns:\n            points (array): interpolated points, shape (N, 2)\n        '''\n        line = LineString(vector)\n        distances = list(np.arange(sample_dist, line.length, sample_dist))\n        # make sure to sample at least two points when sample_dist > line.length\n        distances = [0,] + distances + [line.length,] \n        \n        sampled_points = np.array([list(line.interpolate(distance).coords)\n                                for distance in distances]).squeeze()\n        \n        return sampled_points\n\n    def _evaluate_single(self, \n                         pred_vectors: List, \n                         scores: List, \n                         groundtruth: List, \n                         thresholds: List, \n                         metric: str='metric') -> Dict[int, NDArray]:\n        ''' Do single-frame matching for one class.\n        \n        Args:\n            pred_vectors (List): List[vector(ndarray) (different length)], \n            scores (List): List[score(float)]\n            groundtruth (List): List of vectors\n            thresholds (List): List of thresholds\n        \n        Returns:\n            tp_fp_score_by_thr (Dict): matching results at different thresholds\n                e.g. {0.5: (M, 2), 1.0: (M, 2), 1.5: (M, 2)}\n        '''\n\n        pred_lines = []\n\n        # interpolate predictions\n        for vector in pred_vectors:\n            vector = np.array(vector)\n            vector_interp = self.interp_fixed_num(vector, INTERP_NUM)\n            pred_lines.append(vector_interp)\n        if pred_lines:\n            pred_lines = np.stack(pred_lines)\n        else:\n            pred_lines = np.zeros((0, INTERP_NUM, 2))\n\n        # interpolate groundtruth\n        gt_lines = []\n        for vector in groundtruth:\n            vector_interp = self.interp_fixed_num(vector, INTERP_NUM)\n            gt_lines.append(vector_interp)\n        if gt_lines:\n            gt_lines = np.stack(gt_lines)\n        else:\n            gt_lines = np.zeros((0, INTERP_NUM, 2))\n        \n        scores = np.array(scores)\n        tp_fp_list = instance_match(pred_lines, scores, gt_lines, thresholds, metric) # (M, 2)\n        tp_fp_score_by_thr = {}\n        for i, thr in enumerate(thresholds):\n            tp, fp = tp_fp_list[i]\n            tp_fp_score = np.hstack([tp[:, None], fp[:, None], scores[:, None]])\n            tp_fp_score_by_thr[thr] = tp_fp_score\n        \n        return tp_fp_score_by_thr # {0.5: (M, 2), 1.0: (M, 2), 1.5: (M, 2)}\n        \n    def evaluate(self, \n                 result_path: str, \n                 metric: str='chamfer', \n                 logger: Optional[Logger]=None) -> Dict[str, float]:\n        ''' Do evaluation for a submission file and print evalution results to `logger` if specified.\n        The submission will be aligned by tokens before evaluation. We use multi-worker to speed up.\n        \n        Args:\n            result_path (str): path to submission file\n            metric (str): distance metric. Default: 'chamfer'\n            logger (Logger): logger to print evaluation result, Default: None\n        \n        Returns:\n            new_result_dict (Dict): evaluation results. AP by categories.\n        '''\n        \n        results = mmcv.load(result_path)\n        results = results['results']\n        \n        # re-group samples and gt by label\n        samples_by_cls = {label: [] for label in self.id2cat.keys()}\n        num_gts = {label: 0 for label in self.id2cat.keys()}\n        num_preds = {label: 0 for label in self.id2cat.keys()}\n\n        # align by token\n        for token, gt in self.gts.items():\n            if token in results.keys():\n                pred = results[token]\n            else:\n                pred = {'vectors': [], 'scores': [], 'labels': []}\n            \n            # for every sample\n            vectors_by_cls = {label: [] for label in self.id2cat.keys()}\n            scores_by_cls = {label: [] for label in self.id2cat.keys()}\n\n            for i in range(len(pred['labels'])):\n                # i-th pred line in sample\n                label = pred['labels'][i]\n                vector = pred['vectors'][i]\n                score = pred['scores'][i]\n\n                vectors_by_cls[label].append(vector)\n                scores_by_cls[label].append(score)\n\n            for label in self.id2cat.keys():\n                new_sample = (vectors_by_cls[label], scores_by_cls[label], gt[label])\n                num_gts[label] += len(gt[label])\n                num_preds[label] += len(scores_by_cls[label])\n                samples_by_cls[label].append(new_sample)\n\n        result_dict = {}\n\n        print(f'\\nevaluating {len(self.id2cat)} categories...')\n        start = time()\n        if self.n_workers > 0:\n            pool = Pool(self.n_workers)\n        \n        sum_mAP = 0\n        pbar = mmcv.ProgressBar(len(self.id2cat))\n        for label in self.id2cat.keys():\n            samples = samples_by_cls[label] # List[(pred_lines, scores, gts)]\n            result_dict[self.id2cat[label]] = {\n                'num_gts': num_gts[label],\n                'num_preds': num_preds[label]\n            }\n            sum_AP = 0\n\n            fn = partial(self._evaluate_single, thresholds=self.thresholds, metric=metric)\n            if self.n_workers > 0:\n                tpfp_score_list = pool.starmap(fn, samples)\n            else:\n                tpfp_score_list = []\n                for sample in samples:\n                    tpfp_score_list.append(fn(*sample))\n            \n            for thr in self.thresholds:\n                tp_fp_score = [i[thr] for i in tpfp_score_list]\n                tp_fp_score = np.vstack(tp_fp_score) # (num_dets, 3)\n                sort_inds = np.argsort(-tp_fp_score[:, -1])\n\n                tp = tp_fp_score[sort_inds, 0] # (num_dets,)\n                fp = tp_fp_score[sort_inds, 1] # (num_dets,)\n                tp = np.cumsum(tp, axis=0)\n                fp = np.cumsum(fp, axis=0)\n                eps = np.finfo(np.float32).eps\n                recalls = tp / np.maximum(num_gts[label], eps)\n                precisions = tp / np.maximum((tp + fp), eps)\n\n                AP = average_precision(recalls, precisions, 'area')\n                sum_AP += AP\n                result_dict[self.id2cat[label]].update({f'AP@{thr}': AP})\n\n            pbar.update()\n            \n            AP = sum_AP / len(self.thresholds)\n            sum_mAP += AP\n\n            result_dict[self.id2cat[label]].update({f'AP': AP})\n        \n        if self.n_workers > 0:\n            pool.close()\n        \n        mAP = sum_mAP / len(self.id2cat.keys())\n        result_dict.update({'mAP': mAP})\n        \n        print(f\"finished in {time() - start:.2f}s\")\n\n        # print results\n        table = prettytable.PrettyTable(['category', 'num_preds', 'num_gts'] + \n                [f'AP@{thr}' for thr in self.thresholds] + ['AP'])\n        for label in self.id2cat.keys():\n            table.add_row([\n                self.id2cat[label], \n                result_dict[self.id2cat[label]]['num_preds'],\n                result_dict[self.id2cat[label]]['num_gts'],\n                *[round(result_dict[self.id2cat[label]][f'AP@{thr}'], 4) for thr in self.thresholds],\n                round(result_dict[self.id2cat[label]]['AP'], 4),\n            ])\n        \n        from mmcv.utils import print_log\n        print_log('\\n'+str(table), logger=logger)\n        mAP_normal = 0\n        for label in self.id2cat.keys():\n            for thr in self.thresholds:\n                mAP_normal += result_dict[self.id2cat[label]][f'AP@{thr}']\n        mAP_normal = mAP_normal / 9\n\n        print_log(f'mAP_normal = {mAP_normal:.4f}\\n', logger=logger)\n        # print_log(f'mAP_hard = {mAP_easy:.4f}\\n', logger=logger)\n\n        new_result_dict = {}\n        for name in self.cat2id:\n            new_result_dict[name] = result_dict[name]['AP']\n\n        return new_result_dict"
  },
  {
    "path": "mmdet3d/datasets/kitti2d_dataset.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport numpy as np\n\nfrom mmdet.datasets import CustomDataset\nfrom .builder import DATASETS\n\n\n@DATASETS.register_module()\nclass Kitti2DDataset(CustomDataset):\n    r\"\"\"KITTI 2D Dataset.\n\n    This class serves as the API for experiments on the `KITTI Dataset\n    <http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d>`_.\n\n    Args:\n        data_root (str): Path of dataset root.\n        ann_file (str): Path of annotation file.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        box_type_3d (str, optional): Type of 3D box of this dataset.\n            Based on the `box_type_3d`, the dataset will encapsulate the box\n            to its original format then converted them to `box_type_3d`.\n            Defaults to 'LiDAR'. Available options includes\n\n            - 'LiDAR': Box in LiDAR coordinates.\n            - 'Depth': Box in depth coordinates, usually for indoor dataset.\n            - 'Camera': Box in camera coordinates.\n        filter_empty_gt (bool, optional): Whether to filter empty GT.\n            Defaults to True.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n    \"\"\"\n\n    CLASSES = ('car', 'pedestrian', 'cyclist')\n    \"\"\"\n    Annotation format:\n    [\n        {\n            'image': {\n                'image_idx': 0,\n                'image_path': 'training/image_2/000000.png',\n                'image_shape': array([ 370, 1224], dtype=int32)\n            },\n            'point_cloud': {\n                 'num_features': 4,\n                 'velodyne_path': 'training/velodyne/000000.bin'\n             },\n             'calib': {\n                 'P0': <np.ndarray> (4, 4),\n                 'P1': <np.ndarray> (4, 4),\n                 'P2': <np.ndarray> (4, 4),\n                 'P3': <np.ndarray> (4, 4),\n                 'R0_rect':4x4 np.array,\n                 'Tr_velo_to_cam': 4x4 np.array,\n                 'Tr_imu_to_velo': 4x4 np.array\n             },\n             'annos': {\n                 'name': <np.ndarray> (n),\n                 'truncated': <np.ndarray> (n),\n                 'occluded': <np.ndarray> (n),\n                 'alpha': <np.ndarray> (n),\n                 'bbox': <np.ndarray> (n, 4),\n                 'dimensions': <np.ndarray> (n, 3),\n                 'location': <np.ndarray> (n, 3),\n                 'rotation_y': <np.ndarray> (n),\n                 'score': <np.ndarray> (n),\n                 'index': array([0], dtype=int32),\n                 'group_ids': array([0], dtype=int32),\n                 'difficulty': array([0], dtype=int32),\n                 'num_points_in_gt': <np.ndarray> (n),\n             }\n        }\n    ]\n    \"\"\"\n\n    def load_annotations(self, ann_file):\n        \"\"\"Load annotations from ann_file.\n\n        Args:\n            ann_file (str): Path of the annotation file.\n\n        Returns:\n            list[dict]: List of annotations.\n        \"\"\"\n        self.data_infos = mmcv.load(ann_file)\n        self.cat2label = {\n            cat_name: i\n            for i, cat_name in enumerate(self.CLASSES)\n        }\n        return self.data_infos\n\n    def _filter_imgs(self, min_size=32):\n        \"\"\"Filter images without ground truths.\"\"\"\n        valid_inds = []\n        for i, img_info in enumerate(self.data_infos):\n            if len(img_info['annos']['name']) > 0:\n                valid_inds.append(i)\n        return valid_inds\n\n    def get_ann_info(self, index):\n        \"\"\"Get annotation info according to the given index.\n\n        Args:\n            index (int): Index of the annotation data to get.\n\n        Returns:\n            dict: Annotation information consists of the following keys:\n\n                - bboxes (np.ndarray): Ground truth bboxes.\n                - labels (np.ndarray): Labels of ground truths.\n        \"\"\"\n        # Use index to get the annos, thus the evalhook could also use this api\n        info = self.data_infos[index]\n        annos = info['annos']\n        gt_names = annos['name']\n        gt_bboxes = annos['bbox']\n        difficulty = annos['difficulty']\n\n        # remove classes that is not needed\n        selected = self.keep_arrays_by_name(gt_names, self.CLASSES)\n        gt_bboxes = gt_bboxes[selected]\n        gt_names = gt_names[selected]\n        difficulty = difficulty[selected]\n        gt_labels = np.array([self.cat2label[n] for n in gt_names])\n\n        anns_results = dict(\n            bboxes=gt_bboxes.astype(np.float32),\n            labels=gt_labels,\n        )\n        return anns_results\n\n    def prepare_train_img(self, idx):\n        \"\"\"Training image preparation.\n\n        Args:\n            index (int): Index for accessing the target image data.\n\n        Returns:\n            dict: Training image data dict after preprocessing\n                corresponding to the index.\n        \"\"\"\n        img_raw_info = self.data_infos[idx]['image']\n        img_info = dict(filename=img_raw_info['image_path'])\n        ann_info = self.get_ann_info(idx)\n        if len(ann_info['bboxes']) == 0:\n            return None\n        results = dict(img_info=img_info, ann_info=ann_info)\n        if self.proposals is not None:\n            results['proposals'] = self.proposals[idx]\n        self.pre_pipeline(results)\n        return self.pipeline(results)\n\n    def prepare_test_img(self, idx):\n        \"\"\"Prepare data for testing.\n\n        Args:\n            index (int): Index for accessing the target image data.\n\n        Returns:\n            dict: Testing image data dict after preprocessing\n                corresponding to the index.\n        \"\"\"\n        img_raw_info = self.data_infos[idx]['image']\n        img_info = dict(filename=img_raw_info['image_path'])\n        results = dict(img_info=img_info)\n        if self.proposals is not None:\n            results['proposals'] = self.proposals[idx]\n        self.pre_pipeline(results)\n        return self.pipeline(results)\n\n    def drop_arrays_by_name(self, gt_names, used_classes):\n        \"\"\"Drop irrelevant ground truths by name.\n\n        Args:\n            gt_names (list[str]): Names of ground truths.\n            used_classes (list[str]): Classes of interest.\n\n        Returns:\n            np.ndarray: Indices of ground truths that will be dropped.\n        \"\"\"\n        inds = [i for i, x in enumerate(gt_names) if x not in used_classes]\n        inds = np.array(inds, dtype=np.int64)\n        return inds\n\n    def keep_arrays_by_name(self, gt_names, used_classes):\n        \"\"\"Keep useful ground truths by name.\n\n        Args:\n            gt_names (list[str]): Names of ground truths.\n            used_classes (list[str]): Classes of interest.\n\n        Returns:\n            np.ndarray: Indices of ground truths that will be keeped.\n        \"\"\"\n        inds = [i for i, x in enumerate(gt_names) if x in used_classes]\n        inds = np.array(inds, dtype=np.int64)\n        return inds\n\n    def reformat_bbox(self, outputs, out=None):\n        \"\"\"Reformat bounding boxes to KITTI 2D styles.\n\n        Args:\n            outputs (list[np.ndarray]): List of arrays storing the inferenced\n                bounding boxes and scores.\n            out (str, optional): The prefix of output file.\n                Default: None.\n\n        Returns:\n            list[dict]: A list of dictionaries with the kitti 2D format.\n        \"\"\"\n        from mmdet3d.core.bbox.transforms import bbox2result_kitti2d\n        sample_idx = [info['image']['image_idx'] for info in self.data_infos]\n        result_files = bbox2result_kitti2d(outputs, self.CLASSES, sample_idx,\n                                           out)\n        return result_files\n\n    def evaluate(self, result_files, eval_types=None):\n        \"\"\"Evaluation in KITTI protocol.\n\n        Args:\n            result_files (str): Path of result files.\n            eval_types (str, optional): Types of evaluation. Default: None.\n                KITTI dataset only support 'bbox' evaluation type.\n\n        Returns:\n            tuple (str, dict): Average precision results in str format\n                and average precision results in dict format.\n        \"\"\"\n        from mmdet3d.core.evaluation import kitti_eval\n        eval_types = ['bbox'] if not eval_types else eval_types\n        assert eval_types in ('bbox', ['bbox'\n                                       ]), 'KITTI data set only evaluate bbox'\n        gt_annos = [info['annos'] for info in self.data_infos]\n        ap_result_str, ap_dict = kitti_eval(\n            gt_annos, result_files, self.CLASSES, eval_types=['bbox'])\n        return ap_result_str, ap_dict\n"
  },
  {
    "path": "mmdet3d/datasets/kitti_dataset.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\nimport os\nimport tempfile\nfrom os import path as osp\n\nimport mmcv\nimport numpy as np\nimport torch\nfrom mmcv.utils import print_log\n\nfrom ..core import show_multi_modality_result, show_result\nfrom ..core.bbox import (Box3DMode, CameraInstance3DBoxes, Coord3DMode,\n                         LiDARInstance3DBoxes, points_cam2img)\nfrom .builder import DATASETS\nfrom .custom_3d import Custom3DDataset\nfrom .pipelines import Compose\n\n\n@DATASETS.register_module()\nclass KittiDataset(Custom3DDataset):\n    r\"\"\"KITTI Dataset.\n\n    This class serves as the API for experiments on the `KITTI Dataset\n    <http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d>`_.\n\n    Args:\n        data_root (str): Path of dataset root.\n        ann_file (str): Path of annotation file.\n        split (str): Split of input data.\n        pts_prefix (str, optional): Prefix of points files.\n            Defaults to 'velodyne'.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        box_type_3d (str, optional): Type of 3D box of this dataset.\n            Based on the `box_type_3d`, the dataset will encapsulate the box\n            to its original format then converted them to `box_type_3d`.\n            Defaults to 'LiDAR' in this dataset. Available options includes\n\n            - 'LiDAR': Box in LiDAR coordinates.\n            - 'Depth': Box in depth coordinates, usually for indoor dataset.\n            - 'Camera': Box in camera coordinates.\n        filter_empty_gt (bool, optional): Whether to filter empty GT.\n            Defaults to True.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n        pcd_limit_range (list, optional): The range of point cloud used to\n            filter invalid predicted boxes.\n            Default: [0, -40, -3, 70.4, 40, 0.0].\n    \"\"\"\n    CLASSES = ('car', 'pedestrian', 'cyclist')\n\n    def __init__(self,\n                 data_root,\n                 ann_file,\n                 split,\n                 pts_prefix='velodyne',\n                 pipeline=None,\n                 classes=None,\n                 modality=None,\n                 box_type_3d='LiDAR',\n                 filter_empty_gt=True,\n                 test_mode=False,\n                 pcd_limit_range=[0, -40, -3, 70.4, 40, 0.0],\n                 **kwargs):\n        super().__init__(\n            data_root=data_root,\n            ann_file=ann_file,\n            pipeline=pipeline,\n            classes=classes,\n            modality=modality,\n            box_type_3d=box_type_3d,\n            filter_empty_gt=filter_empty_gt,\n            test_mode=test_mode,\n            **kwargs)\n\n        self.split = split\n        self.root_split = os.path.join(self.data_root, split)\n        assert self.modality is not None\n        self.pcd_limit_range = pcd_limit_range\n        self.pts_prefix = pts_prefix\n\n    def _get_pts_filename(self, idx):\n        \"\"\"Get point cloud filename according to the given index.\n\n        Args:\n            index (int): Index of the point cloud file to get.\n\n        Returns:\n            str: Name of the point cloud file.\n        \"\"\"\n        pts_filename = osp.join(self.root_split, self.pts_prefix,\n                                f'{idx:06d}.bin')\n        return pts_filename\n\n    def get_data_info(self, index):\n        \"\"\"Get data info according to the given index.\n\n        Args:\n            index (int): Index of the sample data to get.\n\n        Returns:\n            dict: Data information that will be passed to the data\n                preprocessing pipelines. It includes the following keys:\n\n                - sample_idx (str): Sample index.\n                - pts_filename (str): Filename of point clouds.\n                - img_prefix (str): Prefix of image files.\n                - img_info (dict): Image info.\n                - lidar2img (list[np.ndarray], optional): Transformations\n                    from lidar to different cameras.\n                - ann_info (dict): Annotation info.\n        \"\"\"\n        info = self.data_infos[index]\n        sample_idx = info['image']['image_idx']\n        img_filename = os.path.join(self.data_root,\n                                    info['image']['image_path'])\n\n        # TODO: consider use torch.Tensor only\n        rect = info['calib']['R0_rect'].astype(np.float32)\n        Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)\n        P2 = info['calib']['P2'].astype(np.float32)\n        lidar2img = P2 @ rect @ Trv2c\n\n        pts_filename = self._get_pts_filename(sample_idx)\n        input_dict = dict(\n            sample_idx=sample_idx,\n            pts_filename=pts_filename,\n            img_prefix=None,\n            img_info=dict(filename=img_filename),\n            lidar2img=lidar2img)\n\n        if not self.test_mode:\n            annos = self.get_ann_info(index)\n            input_dict['ann_info'] = annos\n\n        return input_dict\n\n    def get_ann_info(self, index):\n        \"\"\"Get annotation info according to the given index.\n\n        Args:\n            index (int): Index of the annotation data to get.\n\n        Returns:\n            dict: annotation information consists of the following keys:\n\n                - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`):\n                    3D ground truth bboxes.\n                - gt_labels_3d (np.ndarray): Labels of ground truths.\n                - gt_bboxes (np.ndarray): 2D ground truth bboxes.\n                - gt_labels (np.ndarray): Labels of ground truths.\n                - gt_names (list[str]): Class names of ground truths.\n                - difficulty (int): Difficulty defined by KITTI.\n                    0, 1, 2 represent xxxxx respectively.\n        \"\"\"\n        # Use index to get the annos, thus the evalhook could also use this api\n        info = self.data_infos[index]\n        rect = info['calib']['R0_rect'].astype(np.float32)\n        Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)\n\n        if 'plane' in info:\n            # convert ground plane to velodyne coordinates\n            reverse = np.linalg.inv(rect @ Trv2c)\n\n            (plane_norm_cam,\n             plane_off_cam) = (info['plane'][:3],\n                               -info['plane'][:3] * info['plane'][3])\n            plane_norm_lidar = \\\n                (reverse[:3, :3] @ plane_norm_cam[:, None])[:, 0]\n            plane_off_lidar = (\n                reverse[:3, :3] @ plane_off_cam[:, None][:, 0] +\n                reverse[:3, 3])\n            plane_lidar = np.zeros_like(plane_norm_lidar, shape=(4, ))\n            plane_lidar[:3] = plane_norm_lidar\n            plane_lidar[3] = -plane_norm_lidar.T @ plane_off_lidar\n        else:\n            plane_lidar = None\n\n        difficulty = info['annos']['difficulty']\n        annos = info['annos']\n        # we need other objects to avoid collision when sample\n        annos = self.remove_dontcare(annos)\n        loc = annos['location']\n        dims = annos['dimensions']\n        rots = annos['rotation_y']\n        gt_names = annos['name']\n        gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],\n                                      axis=1).astype(np.float32)\n\n        # convert gt_bboxes_3d to velodyne coordinates\n        gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to(\n            self.box_mode_3d, np.linalg.inv(rect @ Trv2c))\n        gt_bboxes = annos['bbox']\n\n        selected = self.drop_arrays_by_name(gt_names, ['DontCare'])\n        gt_bboxes = gt_bboxes[selected].astype('float32')\n        gt_names = gt_names[selected]\n\n        gt_labels = []\n        for cat in gt_names:\n            if cat in self.CLASSES:\n                gt_labels.append(self.CLASSES.index(cat))\n            else:\n                gt_labels.append(-1)\n        gt_labels = np.array(gt_labels).astype(np.int64)\n        gt_labels_3d = copy.deepcopy(gt_labels)\n\n        anns_results = dict(\n            gt_bboxes_3d=gt_bboxes_3d,\n            gt_labels_3d=gt_labels_3d,\n            bboxes=gt_bboxes,\n            labels=gt_labels,\n            gt_names=gt_names,\n            plane=plane_lidar,\n            difficulty=difficulty)\n        return anns_results\n\n    def drop_arrays_by_name(self, gt_names, used_classes):\n        \"\"\"Drop irrelevant ground truths by name.\n\n        Args:\n            gt_names (list[str]): Names of ground truths.\n            used_classes (list[str]): Classes of interest.\n\n        Returns:\n            np.ndarray: Indices of ground truths that will be dropped.\n        \"\"\"\n        inds = [i for i, x in enumerate(gt_names) if x not in used_classes]\n        inds = np.array(inds, dtype=np.int64)\n        return inds\n\n    def keep_arrays_by_name(self, gt_names, used_classes):\n        \"\"\"Keep useful ground truths by name.\n\n        Args:\n            gt_names (list[str]): Names of ground truths.\n            used_classes (list[str]): Classes of interest.\n\n        Returns:\n            np.ndarray: Indices of ground truths that will be keeped.\n        \"\"\"\n        inds = [i for i, x in enumerate(gt_names) if x in used_classes]\n        inds = np.array(inds, dtype=np.int64)\n        return inds\n\n    def remove_dontcare(self, ann_info):\n        \"\"\"Remove annotations that do not need to be cared.\n\n        Args:\n            ann_info (dict): Dict of annotation infos. The ``'DontCare'``\n                annotations will be removed according to ann_file['name'].\n\n        Returns:\n            dict: Annotations after filtering.\n        \"\"\"\n        img_filtered_annotations = {}\n        relevant_annotation_indices = [\n            i for i, x in enumerate(ann_info['name']) if x != 'DontCare'\n        ]\n        for key in ann_info.keys():\n            img_filtered_annotations[key] = (\n                ann_info[key][relevant_annotation_indices])\n        return img_filtered_annotations\n\n    def format_results(self,\n                       outputs,\n                       pklfile_prefix=None,\n                       submission_prefix=None):\n        \"\"\"Format the results to pkl file.\n\n        Args:\n            outputs (list[dict]): Testing results of the dataset.\n            pklfile_prefix (str): The prefix of pkl files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n            submission_prefix (str): The prefix of submitted files. It\n                includes the file path and the prefix of filename, e.g.,\n                \"a/b/prefix\". If not specified, a temp file will be created.\n                Default: None.\n\n        Returns:\n            tuple: (result_files, tmp_dir), result_files is a dict containing\n                the json filepaths, tmp_dir is the temporal directory created\n                for saving json files when jsonfile_prefix is not specified.\n        \"\"\"\n        if pklfile_prefix is None:\n            tmp_dir = tempfile.TemporaryDirectory()\n            pklfile_prefix = osp.join(tmp_dir.name, 'results')\n        else:\n            tmp_dir = None\n\n        if not isinstance(outputs[0], dict):\n            result_files = self.bbox2result_kitti2d(outputs, self.CLASSES,\n                                                    pklfile_prefix,\n                                                    submission_prefix)\n        elif 'pts_bbox' in outputs[0] or 'img_bbox' in outputs[0]:\n            result_files = dict()\n            for name in outputs[0]:\n                results_ = [out[name] for out in outputs]\n                pklfile_prefix_ = pklfile_prefix + name\n                if submission_prefix is not None:\n                    submission_prefix_ = submission_prefix + name\n                else:\n                    submission_prefix_ = None\n                if 'img' in name:\n                    result_files = self.bbox2result_kitti2d(\n                        results_, self.CLASSES, pklfile_prefix_,\n                        submission_prefix_)\n                else:\n                    result_files_ = self.bbox2result_kitti(\n                        results_, self.CLASSES, pklfile_prefix_,\n                        submission_prefix_)\n                result_files[name] = result_files_\n        else:\n            result_files = self.bbox2result_kitti(outputs, self.CLASSES,\n                                                  pklfile_prefix,\n                                                  submission_prefix)\n        return result_files, tmp_dir\n\n    def evaluate(self,\n                 results,\n                 metric=None,\n                 logger=None,\n                 pklfile_prefix=None,\n                 submission_prefix=None,\n                 show=False,\n                 out_dir=None,\n                 pipeline=None):\n        \"\"\"Evaluation in KITTI protocol.\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            metric (str | list[str], optional): Metrics to be evaluated.\n                Default: None.\n            logger (logging.Logger | str, optional): Logger used for printing\n                related information during evaluation. Default: None.\n            pklfile_prefix (str, optional): The prefix of pkl files, including\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n            submission_prefix (str, optional): The prefix of submission data.\n                If not specified, the submission data will not be generated.\n                Default: None.\n            show (bool, optional): Whether to visualize.\n                Default: False.\n            out_dir (str, optional): Path to save the visualization results.\n                Default: None.\n            pipeline (list[dict], optional): raw data loading for showing.\n                Default: None.\n\n        Returns:\n            dict[str, float]: Results of each evaluation metric.\n        \"\"\"\n        result_files, tmp_dir = self.format_results(results, pklfile_prefix)\n        from mmdet3d.core.evaluation import kitti_eval\n        gt_annos = [info['annos'] for info in self.data_infos]\n\n        if isinstance(result_files, dict):\n            ap_dict = dict()\n            for name, result_files_ in result_files.items():\n                eval_types = ['bbox', 'bev', '3d']\n                if 'img' in name:\n                    eval_types = ['bbox']\n                ap_result_str, ap_dict_ = kitti_eval(\n                    gt_annos,\n                    result_files_,\n                    self.CLASSES,\n                    eval_types=eval_types)\n                for ap_type, ap in ap_dict_.items():\n                    ap_dict[f'{name}/{ap_type}'] = float('{:.4f}'.format(ap))\n\n                print_log(\n                    f'Results of {name}:\\n' + ap_result_str, logger=logger)\n\n        else:\n            if metric == 'img_bbox':\n                ap_result_str, ap_dict = kitti_eval(\n                    gt_annos, result_files, self.CLASSES, eval_types=['bbox'])\n            else:\n                ap_result_str, ap_dict = kitti_eval(gt_annos, result_files,\n                                                    self.CLASSES)\n            print_log('\\n' + ap_result_str, logger=logger)\n\n        if tmp_dir is not None:\n            tmp_dir.cleanup()\n        if show or out_dir:\n            self.show(results, out_dir, show=show, pipeline=pipeline)\n        return ap_dict\n\n    def bbox2result_kitti(self,\n                          net_outputs,\n                          class_names,\n                          pklfile_prefix=None,\n                          submission_prefix=None):\n        \"\"\"Convert 3D detection results to kitti format for evaluation and test\n        submission.\n\n        Args:\n            net_outputs (list[np.ndarray]): List of array storing the\n                inferenced bounding boxes and scores.\n            class_names (list[String]): A list of class names.\n            pklfile_prefix (str): The prefix of pkl file.\n            submission_prefix (str): The prefix of submission file.\n\n        Returns:\n            list[dict]: A list of dictionaries with the kitti format.\n        \"\"\"\n        assert len(net_outputs) == len(self.data_infos), \\\n            'invalid list length of network outputs'\n        if submission_prefix is not None:\n            mmcv.mkdir_or_exist(submission_prefix)\n\n        det_annos = []\n        print('\\nConverting prediction to KITTI format')\n        for idx, pred_dicts in enumerate(\n                mmcv.track_iter_progress(net_outputs)):\n            annos = []\n            info = self.data_infos[idx]\n            sample_idx = info['image']['image_idx']\n            image_shape = info['image']['image_shape'][:2]\n            box_dict = self.convert_valid_bboxes(pred_dicts, info)\n            anno = {\n                'name': [],\n                'truncated': [],\n                'occluded': [],\n                'alpha': [],\n                'bbox': [],\n                'dimensions': [],\n                'location': [],\n                'rotation_y': [],\n                'score': []\n            }\n            if len(box_dict['bbox']) > 0:\n                box_2d_preds = box_dict['bbox']\n                box_preds = box_dict['box3d_camera']\n                scores = box_dict['scores']\n                box_preds_lidar = box_dict['box3d_lidar']\n                label_preds = box_dict['label_preds']\n\n                for box, box_lidar, bbox, score, label in zip(\n                        box_preds, box_preds_lidar, box_2d_preds, scores,\n                        label_preds):\n                    bbox[2:] = np.minimum(bbox[2:], image_shape[::-1])\n                    bbox[:2] = np.maximum(bbox[:2], [0, 0])\n                    anno['name'].append(class_names[int(label)])\n                    anno['truncated'].append(0.0)\n                    anno['occluded'].append(0)\n                    anno['alpha'].append(\n                        -np.arctan2(-box_lidar[1], box_lidar[0]) + box[6])\n                    anno['bbox'].append(bbox)\n                    anno['dimensions'].append(box[3:6])\n                    anno['location'].append(box[:3])\n                    anno['rotation_y'].append(box[6])\n                    anno['score'].append(score)\n\n                anno = {k: np.stack(v) for k, v in anno.items()}\n                annos.append(anno)\n            else:\n                anno = {\n                    'name': np.array([]),\n                    'truncated': np.array([]),\n                    'occluded': np.array([]),\n                    'alpha': np.array([]),\n                    'bbox': np.zeros([0, 4]),\n                    'dimensions': np.zeros([0, 3]),\n                    'location': np.zeros([0, 3]),\n                    'rotation_y': np.array([]),\n                    'score': np.array([]),\n                }\n                annos.append(anno)\n\n            if submission_prefix is not None:\n                curr_file = f'{submission_prefix}/{sample_idx:06d}.txt'\n                with open(curr_file, 'w') as f:\n                    bbox = anno['bbox']\n                    loc = anno['location']\n                    dims = anno['dimensions']  # lhw -> hwl\n\n                    for idx in range(len(bbox)):\n                        print(\n                            '{} -1 -1 {:.4f} {:.4f} {:.4f} {:.4f} '\n                            '{:.4f} {:.4f} {:.4f} '\n                            '{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'.format(\n                                anno['name'][idx], anno['alpha'][idx],\n                                bbox[idx][0], bbox[idx][1], bbox[idx][2],\n                                bbox[idx][3], dims[idx][1], dims[idx][2],\n                                dims[idx][0], loc[idx][0], loc[idx][1],\n                                loc[idx][2], anno['rotation_y'][idx],\n                                anno['score'][idx]),\n                            file=f)\n\n            annos[-1]['sample_idx'] = np.array(\n                [sample_idx] * len(annos[-1]['score']), dtype=np.int64)\n\n            det_annos += annos\n\n        if pklfile_prefix is not None:\n            if not pklfile_prefix.endswith(('.pkl', '.pickle')):\n                out = f'{pklfile_prefix}.pkl'\n            mmcv.dump(det_annos, out)\n            print(f'Result is saved to {out}.')\n\n        return det_annos\n\n    def bbox2result_kitti2d(self,\n                            net_outputs,\n                            class_names,\n                            pklfile_prefix=None,\n                            submission_prefix=None):\n        \"\"\"Convert 2D detection results to kitti format for evaluation and test\n        submission.\n\n        Args:\n            net_outputs (list[np.ndarray]): List of array storing the\n                inferenced bounding boxes and scores.\n            class_names (list[String]): A list of class names.\n            pklfile_prefix (str): The prefix of pkl file.\n            submission_prefix (str): The prefix of submission file.\n\n        Returns:\n            list[dict]: A list of dictionaries have the kitti format\n        \"\"\"\n        assert len(net_outputs) == len(self.data_infos), \\\n            'invalid list length of network outputs'\n        det_annos = []\n        print('\\nConverting prediction to KITTI format')\n        for i, bboxes_per_sample in enumerate(\n                mmcv.track_iter_progress(net_outputs)):\n            annos = []\n            anno = dict(\n                name=[],\n                truncated=[],\n                occluded=[],\n                alpha=[],\n                bbox=[],\n                dimensions=[],\n                location=[],\n                rotation_y=[],\n                score=[])\n            sample_idx = self.data_infos[i]['image']['image_idx']\n\n            num_example = 0\n            for label in range(len(bboxes_per_sample)):\n                bbox = bboxes_per_sample[label]\n                for i in range(bbox.shape[0]):\n                    anno['name'].append(class_names[int(label)])\n                    anno['truncated'].append(0.0)\n                    anno['occluded'].append(0)\n                    anno['alpha'].append(0.0)\n                    anno['bbox'].append(bbox[i, :4])\n                    # set dimensions (height, width, length) to zero\n                    anno['dimensions'].append(\n                        np.zeros(shape=[3], dtype=np.float32))\n                    # set the 3D translation to (-1000, -1000, -1000)\n                    anno['location'].append(\n                        np.ones(shape=[3], dtype=np.float32) * (-1000.0))\n                    anno['rotation_y'].append(0.0)\n                    anno['score'].append(bbox[i, 4])\n                    num_example += 1\n\n            if num_example == 0:\n                annos.append(\n                    dict(\n                        name=np.array([]),\n                        truncated=np.array([]),\n                        occluded=np.array([]),\n                        alpha=np.array([]),\n                        bbox=np.zeros([0, 4]),\n                        dimensions=np.zeros([0, 3]),\n                        location=np.zeros([0, 3]),\n                        rotation_y=np.array([]),\n                        score=np.array([]),\n                    ))\n            else:\n                anno = {k: np.stack(v) for k, v in anno.items()}\n                annos.append(anno)\n\n            annos[-1]['sample_idx'] = np.array(\n                [sample_idx] * num_example, dtype=np.int64)\n            det_annos += annos\n\n        if pklfile_prefix is not None:\n            # save file in pkl format\n            pklfile_path = (\n                pklfile_prefix[:-4] if pklfile_prefix.endswith(\n                    ('.pkl', '.pickle')) else pklfile_prefix)\n            mmcv.dump(det_annos, pklfile_path)\n\n        if submission_prefix is not None:\n            # save file in submission format\n            mmcv.mkdir_or_exist(submission_prefix)\n            print(f'Saving KITTI submission to {submission_prefix}')\n            for i, anno in enumerate(det_annos):\n                sample_idx = self.data_infos[i]['image']['image_idx']\n                cur_det_file = f'{submission_prefix}/{sample_idx:06d}.txt'\n                with open(cur_det_file, 'w') as f:\n                    bbox = anno['bbox']\n                    loc = anno['location']\n                    dims = anno['dimensions'][::-1]  # lhw -> hwl\n                    for idx in range(len(bbox)):\n                        print(\n                            '{} -1 -1 {:4f} {:4f} {:4f} {:4f} {:4f} {:4f} '\n                            '{:4f} {:4f} {:4f} {:4f} {:4f} {:4f} {:4f}'.format(\n                                anno['name'][idx],\n                                anno['alpha'][idx],\n                                *bbox[idx],  # 4 float\n                                *dims[idx],  # 3 float\n                                *loc[idx],  # 3 float\n                                anno['rotation_y'][idx],\n                                anno['score'][idx]),\n                            file=f,\n                        )\n            print(f'Result is saved to {submission_prefix}')\n\n        return det_annos\n\n    def convert_valid_bboxes(self, box_dict, info):\n        \"\"\"Convert the predicted boxes into valid ones.\n\n        Args:\n            box_dict (dict): Box dictionaries to be converted.\n\n                - boxes_3d (:obj:`LiDARInstance3DBoxes`): 3D bounding boxes.\n                - scores_3d (torch.Tensor): Scores of boxes.\n                - labels_3d (torch.Tensor): Class labels of boxes.\n            info (dict): Data info.\n\n        Returns:\n            dict: Valid predicted boxes.\n\n                - bbox (np.ndarray): 2D bounding boxes.\n                - box3d_camera (np.ndarray): 3D bounding boxes in\n                    camera coordinate.\n                - box3d_lidar (np.ndarray): 3D bounding boxes in\n                    LiDAR coordinate.\n                - scores (np.ndarray): Scores of boxes.\n                - label_preds (np.ndarray): Class label predictions.\n                - sample_idx (int): Sample index.\n        \"\"\"\n        # TODO: refactor this function\n        box_preds = box_dict['boxes_3d']\n        scores = box_dict['scores_3d']\n        labels = box_dict['labels_3d']\n        sample_idx = info['image']['image_idx']\n        box_preds.limit_yaw(offset=0.5, period=np.pi * 2)\n\n        if len(box_preds) == 0:\n            return dict(\n                bbox=np.zeros([0, 4]),\n                box3d_camera=np.zeros([0, 7]),\n                box3d_lidar=np.zeros([0, 7]),\n                scores=np.zeros([0]),\n                label_preds=np.zeros([0, 4]),\n                sample_idx=sample_idx)\n\n        rect = info['calib']['R0_rect'].astype(np.float32)\n        Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)\n        P2 = info['calib']['P2'].astype(np.float32)\n        img_shape = info['image']['image_shape']\n        P2 = box_preds.tensor.new_tensor(P2)\n\n        box_preds_camera = box_preds.convert_to(Box3DMode.CAM, rect @ Trv2c)\n\n        box_corners = box_preds_camera.corners\n        box_corners_in_image = points_cam2img(box_corners, P2)\n        # box_corners_in_image: [N, 8, 2]\n        minxy = torch.min(box_corners_in_image, dim=1)[0]\n        maxxy = torch.max(box_corners_in_image, dim=1)[0]\n        box_2d_preds = torch.cat([minxy, maxxy], dim=1)\n        # Post-processing\n        # check box_preds_camera\n        image_shape = box_preds.tensor.new_tensor(img_shape)\n        valid_cam_inds = ((box_2d_preds[:, 0] < image_shape[1]) &\n                          (box_2d_preds[:, 1] < image_shape[0]) &\n                          (box_2d_preds[:, 2] > 0) & (box_2d_preds[:, 3] > 0))\n        # check box_preds\n        limit_range = box_preds.tensor.new_tensor(self.pcd_limit_range)\n        valid_pcd_inds = ((box_preds.center > limit_range[:3]) &\n                          (box_preds.center < limit_range[3:]))\n        valid_inds = valid_cam_inds & valid_pcd_inds.all(-1)\n\n        if valid_inds.sum() > 0:\n            return dict(\n                bbox=box_2d_preds[valid_inds, :].numpy(),\n                box3d_camera=box_preds_camera[valid_inds].tensor.numpy(),\n                box3d_lidar=box_preds[valid_inds].tensor.numpy(),\n                scores=scores[valid_inds].numpy(),\n                label_preds=labels[valid_inds].numpy(),\n                sample_idx=sample_idx)\n        else:\n            return dict(\n                bbox=np.zeros([0, 4]),\n                box3d_camera=np.zeros([0, 7]),\n                box3d_lidar=np.zeros([0, 7]),\n                scores=np.zeros([0]),\n                label_preds=np.zeros([0, 4]),\n                sample_idx=sample_idx)\n\n    def _build_default_pipeline(self):\n        \"\"\"Build the default pipeline for this dataset.\"\"\"\n        pipeline = [\n            dict(\n                type='LoadPointsFromFile',\n                coord_type='LIDAR',\n                load_dim=4,\n                use_dim=4,\n                file_client_args=dict(backend='disk')),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=self.CLASSES,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ]\n        if self.modality['use_camera']:\n            pipeline.insert(0, dict(type='LoadImageFromFile'))\n        return Compose(pipeline)\n\n    def show(self, results, out_dir, show=True, pipeline=None):\n        \"\"\"Results visualization.\n\n        Args:\n            results (list[dict]): List of bounding boxes results.\n            out_dir (str): Output directory of visualization result.\n            show (bool): Whether to visualize the results online.\n                Default: False.\n            pipeline (list[dict], optional): raw data loading for showing.\n                Default: None.\n        \"\"\"\n        assert out_dir is not None, 'Expect out_dir, got none.'\n        pipeline = self._get_pipeline(pipeline)\n        for i, result in enumerate(results):\n            if 'pts_bbox' in result.keys():\n                result = result['pts_bbox']\n            data_info = self.data_infos[i]\n            pts_path = data_info['point_cloud']['velodyne_path']\n            file_name = osp.split(pts_path)[-1].split('.')[0]\n            points, img_metas, img = self._extract_data(\n                i, pipeline, ['points', 'img_metas', 'img'])\n            points = points.numpy()\n            # for now we convert points into depth mode\n            points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR,\n                                               Coord3DMode.DEPTH)\n            gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()\n            show_gt_bboxes = Box3DMode.convert(gt_bboxes, Box3DMode.LIDAR,\n                                               Box3DMode.DEPTH)\n            pred_bboxes = result['boxes_3d'].tensor.numpy()\n            show_pred_bboxes = Box3DMode.convert(pred_bboxes, Box3DMode.LIDAR,\n                                                 Box3DMode.DEPTH)\n            show_result(points, show_gt_bboxes, show_pred_bboxes, out_dir,\n                        file_name, show)\n\n            # multi-modality visualization\n            if self.modality['use_camera'] and 'lidar2img' in img_metas.keys():\n                img = img.numpy()\n                # need to transpose channel to first dim\n                img = img.transpose(1, 2, 0)\n                show_pred_bboxes = LiDARInstance3DBoxes(\n                    pred_bboxes, origin=(0.5, 0.5, 0))\n                show_gt_bboxes = LiDARInstance3DBoxes(\n                    gt_bboxes, origin=(0.5, 0.5, 0))\n                show_multi_modality_result(\n                    img,\n                    show_gt_bboxes,\n                    show_pred_bboxes,\n                    img_metas['lidar2img'],\n                    out_dir,\n                    file_name,\n                    box_mode='lidar',\n                    show=show)\n"
  },
  {
    "path": "mmdet3d/datasets/kitti_mono_dataset.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\nimport tempfile\nfrom os import path as osp\n\nimport mmcv\nimport numpy as np\nimport torch\nfrom mmcv.utils import print_log\n\nfrom ..core.bbox import Box3DMode, CameraInstance3DBoxes, points_cam2img\nfrom .builder import DATASETS\nfrom .nuscenes_mono_dataset import NuScenesMonoDataset\n\n\n@DATASETS.register_module()\nclass KittiMonoDataset(NuScenesMonoDataset):\n    \"\"\"Monocular 3D detection on KITTI Dataset.\n\n    Args:\n        data_root (str): Path of dataset root.\n        info_file (str): Path of info file.\n        load_interval (int, optional): Interval of loading the dataset. It is\n            used to uniformly sample the dataset. Defaults to 1.\n        with_velocity (bool, optional): Whether include velocity prediction\n            into the experiments. Defaults to False.\n        eval_version (str, optional): Configuration version of evaluation.\n            Defaults to None.\n        version (str, optional): Dataset version. Defaults to None.\n        kwargs (dict): Other arguments are the same of NuScenesMonoDataset.\n    \"\"\"\n\n    CLASSES = ('Pedestrian', 'Cyclist', 'Car')\n\n    def __init__(self,\n                 data_root,\n                 info_file,\n                 ann_file,\n                 pipeline,\n                 load_interval=1,\n                 with_velocity=False,\n                 eval_version=None,\n                 version=None,\n                 **kwargs):\n        super().__init__(\n            data_root=data_root,\n            ann_file=ann_file,\n            pipeline=pipeline,\n            load_interval=load_interval,\n            with_velocity=with_velocity,\n            eval_version=eval_version,\n            version=version,\n            **kwargs)\n        self.anno_infos = mmcv.load(info_file)\n        self.bbox_code_size = 7\n\n    def _parse_ann_info(self, img_info, ann_info):\n        \"\"\"Parse bbox and mask annotation.\n\n        Args:\n            ann_info (list[dict]): Annotation info of an image.\n            with_mask (bool): Whether to parse mask annotations.\n\n        Returns:\n            dict: A dict containing the following keys: bboxes, bboxes_ignore,\n                labels, masks, seg_map. \"masks\" are raw annotations and not\n                decoded into binary masks.\n        \"\"\"\n        gt_bboxes = []\n        gt_labels = []\n        gt_bboxes_ignore = []\n        gt_masks_ann = []\n        gt_bboxes_cam3d = []\n        centers2d = []\n        depths = []\n        for i, ann in enumerate(ann_info):\n            if ann.get('ignore', False):\n                continue\n            x1, y1, w, h = ann['bbox']\n            inter_w = max(0, min(x1 + w, img_info['width']) - max(x1, 0))\n            inter_h = max(0, min(y1 + h, img_info['height']) - max(y1, 0))\n            if inter_w * inter_h == 0:\n                continue\n            if ann['area'] <= 0 or w < 1 or h < 1:\n                continue\n            if ann['category_id'] not in self.cat_ids:\n                continue\n            bbox = [x1, y1, x1 + w, y1 + h]\n            if ann.get('iscrowd', False):\n                gt_bboxes_ignore.append(bbox)\n            else:\n                gt_bboxes.append(bbox)\n                gt_labels.append(self.cat2label[ann['category_id']])\n                gt_masks_ann.append(ann.get('segmentation', None))\n                # 3D annotations in camera coordinates\n                bbox_cam3d = np.array(ann['bbox_cam3d']).reshape(-1, )\n                gt_bboxes_cam3d.append(bbox_cam3d)\n                # 2.5D annotations in camera coordinates\n                center2d = ann['center2d'][:2]\n                depth = ann['center2d'][2]\n                centers2d.append(center2d)\n                depths.append(depth)\n\n        if gt_bboxes:\n            gt_bboxes = np.array(gt_bboxes, dtype=np.float32)\n            gt_labels = np.array(gt_labels, dtype=np.int64)\n        else:\n            gt_bboxes = np.zeros((0, 4), dtype=np.float32)\n            gt_labels = np.array([], dtype=np.int64)\n\n        if gt_bboxes_cam3d:\n            gt_bboxes_cam3d = np.array(gt_bboxes_cam3d, dtype=np.float32)\n            centers2d = np.array(centers2d, dtype=np.float32)\n            depths = np.array(depths, dtype=np.float32)\n        else:\n            gt_bboxes_cam3d = np.zeros((0, self.bbox_code_size),\n                                       dtype=np.float32)\n            centers2d = np.zeros((0, 2), dtype=np.float32)\n            depths = np.zeros((0), dtype=np.float32)\n\n        gt_bboxes_cam3d = CameraInstance3DBoxes(\n            gt_bboxes_cam3d,\n            box_dim=gt_bboxes_cam3d.shape[-1],\n            origin=(0.5, 0.5, 0.5))\n        gt_labels_3d = copy.deepcopy(gt_labels)\n\n        if gt_bboxes_ignore:\n            gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)\n        else:\n            gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)\n\n        seg_map = img_info['filename'].replace('jpg', 'png')\n\n        ann = dict(\n            bboxes=gt_bboxes,\n            labels=gt_labels,\n            gt_bboxes_3d=gt_bboxes_cam3d,\n            gt_labels_3d=gt_labels_3d,\n            centers2d=centers2d,\n            depths=depths,\n            bboxes_ignore=gt_bboxes_ignore,\n            masks=gt_masks_ann,\n            seg_map=seg_map)\n\n        return ann\n\n    def format_results(self,\n                       outputs,\n                       pklfile_prefix=None,\n                       submission_prefix=None):\n        \"\"\"Format the results to pkl file.\n\n        Args:\n            outputs (list[dict]): Testing results of the dataset.\n            pklfile_prefix (str): The prefix of pkl files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n            submission_prefix (str): The prefix of submitted files. It\n                includes the file path and the prefix of filename, e.g.,\n                \"a/b/prefix\". If not specified, a temp file will be created.\n                Default: None.\n\n        Returns:\n            tuple: (result_files, tmp_dir), result_files is a dict containing\n                the json filepaths, tmp_dir is the temporal directory created\n                for saving json files when jsonfile_prefix is not specified.\n        \"\"\"\n        if pklfile_prefix is None:\n            tmp_dir = tempfile.TemporaryDirectory()\n            pklfile_prefix = osp.join(tmp_dir.name, 'results')\n        else:\n            tmp_dir = None\n\n        if not isinstance(outputs[0], dict):\n            result_files = self.bbox2result_kitti2d(outputs, self.CLASSES,\n                                                    pklfile_prefix,\n                                                    submission_prefix)\n        elif 'pts_bbox' in outputs[0] or 'img_bbox' in outputs[0] or \\\n                'img_bbox2d' in outputs[0]:\n            result_files = dict()\n            for name in outputs[0]:\n                results_ = [out[name] for out in outputs]\n                pklfile_prefix_ = pklfile_prefix + name\n                if submission_prefix is not None:\n                    submission_prefix_ = submission_prefix + name\n                else:\n                    submission_prefix_ = None\n                if '2d' in name:\n                    result_files_ = self.bbox2result_kitti2d(\n                        results_, self.CLASSES, pklfile_prefix_,\n                        submission_prefix_)\n                else:\n                    result_files_ = self.bbox2result_kitti(\n                        results_, self.CLASSES, pklfile_prefix_,\n                        submission_prefix_)\n                result_files[name] = result_files_\n        else:\n            result_files = self.bbox2result_kitti(outputs, self.CLASSES,\n                                                  pklfile_prefix,\n                                                  submission_prefix)\n        return result_files, tmp_dir\n\n    def evaluate(self,\n                 results,\n                 metric=None,\n                 logger=None,\n                 pklfile_prefix=None,\n                 submission_prefix=None,\n                 show=False,\n                 out_dir=None,\n                 pipeline=None):\n        \"\"\"Evaluation in KITTI protocol.\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            metric (str | list[str], optional): Metrics to be evaluated.\n                Defaults to None.\n            logger (logging.Logger | str, optional): Logger used for printing\n                related information during evaluation. Default: None.\n            pklfile_prefix (str, optional): The prefix of pkl files, including\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n            submission_prefix (str, optional): The prefix of submission data.\n                If not specified, the submission data will not be generated.\n            show (bool, optional): Whether to visualize.\n                Default: False.\n            out_dir (str, optional): Path to save the visualization results.\n                Default: None.\n            pipeline (list[dict], optional): raw data loading for showing.\n                Default: None.\n\n        Returns:\n            dict[str, float]: Results of each evaluation metric.\n        \"\"\"\n        result_files, tmp_dir = self.format_results(results, pklfile_prefix)\n        from mmdet3d.core.evaluation import kitti_eval\n        gt_annos = [info['annos'] for info in self.anno_infos]\n\n        if isinstance(result_files, dict):\n            ap_dict = dict()\n            for name, result_files_ in result_files.items():\n                eval_types = ['bbox', 'bev', '3d']\n                if '2d' in name:\n                    eval_types = ['bbox']\n                ap_result_str, ap_dict_ = kitti_eval(\n                    gt_annos,\n                    result_files_,\n                    self.CLASSES,\n                    eval_types=eval_types)\n                for ap_type, ap in ap_dict_.items():\n                    ap_dict[f'{name}/{ap_type}'] = float('{:.4f}'.format(ap))\n\n                print_log(\n                    f'Results of {name}:\\n' + ap_result_str, logger=logger)\n\n        else:\n            if metric == 'img_bbox2d':\n                ap_result_str, ap_dict = kitti_eval(\n                    gt_annos, result_files, self.CLASSES, eval_types=['bbox'])\n            else:\n                ap_result_str, ap_dict = kitti_eval(gt_annos, result_files,\n                                                    self.CLASSES)\n            print_log('\\n' + ap_result_str, logger=logger)\n\n        if tmp_dir is not None:\n            tmp_dir.cleanup()\n        if show or out_dir:\n            self.show(results, out_dir, show=show, pipeline=pipeline)\n        return ap_dict\n\n    def bbox2result_kitti(self,\n                          net_outputs,\n                          class_names,\n                          pklfile_prefix=None,\n                          submission_prefix=None):\n        \"\"\"Convert 3D detection results to kitti format for evaluation and test\n        submission.\n\n        Args:\n            net_outputs (list[np.ndarray]): List of array storing the\n                inferenced bounding boxes and scores.\n            class_names (list[String]): A list of class names.\n            pklfile_prefix (str): The prefix of pkl file.\n            submission_prefix (str): The prefix of submission file.\n\n        Returns:\n            list[dict]: A list of dictionaries with the kitti format.\n        \"\"\"\n        assert len(net_outputs) == len(self.anno_infos)\n        if submission_prefix is not None:\n            mmcv.mkdir_or_exist(submission_prefix)\n\n        det_annos = []\n        print('\\nConverting prediction to KITTI format')\n        for idx, pred_dicts in enumerate(\n                mmcv.track_iter_progress(net_outputs)):\n            annos = []\n            info = self.anno_infos[idx]\n            sample_idx = info['image']['image_idx']\n            image_shape = info['image']['image_shape'][:2]\n\n            box_dict = self.convert_valid_bboxes(pred_dicts, info)\n            anno = {\n                'name': [],\n                'truncated': [],\n                'occluded': [],\n                'alpha': [],\n                'bbox': [],\n                'dimensions': [],\n                'location': [],\n                'rotation_y': [],\n                'score': []\n            }\n            if len(box_dict['bbox']) > 0:\n                box_2d_preds = box_dict['bbox']\n                box_preds = box_dict['box3d_camera']\n                scores = box_dict['scores']\n                box_preds_lidar = box_dict['box3d_lidar']\n                label_preds = box_dict['label_preds']\n\n                for box, box_lidar, bbox, score, label in zip(\n                        box_preds, box_preds_lidar, box_2d_preds, scores,\n                        label_preds):\n                    bbox[2:] = np.minimum(bbox[2:], image_shape[::-1])\n                    bbox[:2] = np.maximum(bbox[:2], [0, 0])\n                    anno['name'].append(class_names[int(label)])\n                    anno['truncated'].append(0.0)\n                    anno['occluded'].append(0)\n                    anno['alpha'].append(-np.arctan2(box[0], box[2]) + box[6])\n                    anno['bbox'].append(bbox)\n                    anno['dimensions'].append(box[3:6])\n                    anno['location'].append(box[:3])\n                    anno['rotation_y'].append(box[6])\n                    anno['score'].append(score)\n\n                anno = {k: np.stack(v) for k, v in anno.items()}\n                annos.append(anno)\n\n            else:\n                anno = {\n                    'name': np.array([]),\n                    'truncated': np.array([]),\n                    'occluded': np.array([]),\n                    'alpha': np.array([]),\n                    'bbox': np.zeros([0, 4]),\n                    'dimensions': np.zeros([0, 3]),\n                    'location': np.zeros([0, 3]),\n                    'rotation_y': np.array([]),\n                    'score': np.array([]),\n                }\n                annos.append(anno)\n\n            if submission_prefix is not None:\n                curr_file = f'{submission_prefix}/{sample_idx:06d}.txt'\n                with open(curr_file, 'w') as f:\n                    bbox = anno['bbox']\n                    loc = anno['location']\n                    dims = anno['dimensions']  # lhw -> hwl\n\n                    for idx in range(len(bbox)):\n                        print(\n                            '{} -1 -1 {:.4f} {:.4f} {:.4f} {:.4f} '\n                            '{:.4f} {:.4f} {:.4f} '\n                            '{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'.format(\n                                anno['name'][idx], anno['alpha'][idx],\n                                bbox[idx][0], bbox[idx][1], bbox[idx][2],\n                                bbox[idx][3], dims[idx][1], dims[idx][2],\n                                dims[idx][0], loc[idx][0], loc[idx][1],\n                                loc[idx][2], anno['rotation_y'][idx],\n                                anno['score'][idx]),\n                            file=f)\n\n            annos[-1]['sample_idx'] = np.array(\n                [sample_idx] * len(annos[-1]['score']), dtype=np.int64)\n\n            det_annos += annos\n\n        if pklfile_prefix is not None:\n            if not pklfile_prefix.endswith(('.pkl', '.pickle')):\n                out = f'{pklfile_prefix}.pkl'\n            mmcv.dump(det_annos, out)\n            print('Result is saved to %s' % out)\n\n        return det_annos\n\n    def bbox2result_kitti2d(self,\n                            net_outputs,\n                            class_names,\n                            pklfile_prefix=None,\n                            submission_prefix=None):\n        \"\"\"Convert 2D detection results to kitti format for evaluation and test\n        submission.\n\n        Args:\n            net_outputs (list[np.ndarray]): List of array storing the\n                inferenced bounding boxes and scores.\n            class_names (list[String]): A list of class names.\n            pklfile_prefix (str): The prefix of pkl file.\n            submission_prefix (str): The prefix of submission file.\n\n        Returns:\n            list[dict]: A list of dictionaries have the kitti format\n        \"\"\"\n        assert len(net_outputs) == len(self.anno_infos)\n\n        det_annos = []\n        print('\\nConverting prediction to KITTI format')\n        for i, bboxes_per_sample in enumerate(\n                mmcv.track_iter_progress(net_outputs)):\n            annos = []\n            anno = dict(\n                name=[],\n                truncated=[],\n                occluded=[],\n                alpha=[],\n                bbox=[],\n                dimensions=[],\n                location=[],\n                rotation_y=[],\n                score=[])\n            sample_idx = self.anno_infos[i]['image']['image_idx']\n\n            num_example = 0\n            for label in range(len(bboxes_per_sample)):\n                bbox = bboxes_per_sample[label]\n                for i in range(bbox.shape[0]):\n                    anno['name'].append(class_names[int(label)])\n                    anno['truncated'].append(0.0)\n                    anno['occluded'].append(0)\n                    anno['alpha'].append(-10)\n                    anno['bbox'].append(bbox[i, :4])\n                    # set dimensions (height, width, length) to zero\n                    anno['dimensions'].append(\n                        np.zeros(shape=[3], dtype=np.float32))\n                    # set the 3D translation to (-1000, -1000, -1000)\n                    anno['location'].append(\n                        np.ones(shape=[3], dtype=np.float32) * (-1000.0))\n                    anno['rotation_y'].append(0.0)\n                    anno['score'].append(bbox[i, 4])\n                    num_example += 1\n\n            if num_example == 0:\n                annos.append(\n                    dict(\n                        name=np.array([]),\n                        truncated=np.array([]),\n                        occluded=np.array([]),\n                        alpha=np.array([]),\n                        bbox=np.zeros([0, 4]),\n                        dimensions=np.zeros([0, 3]),\n                        location=np.zeros([0, 3]),\n                        rotation_y=np.array([]),\n                        score=np.array([]),\n                    ))\n            else:\n                anno = {k: np.stack(v) for k, v in anno.items()}\n                annos.append(anno)\n\n            annos[-1]['sample_idx'] = np.array(\n                [sample_idx] * num_example, dtype=np.int64)\n            det_annos += annos\n\n        if pklfile_prefix is not None:\n            if not pklfile_prefix.endswith(('.pkl', '.pickle')):\n                out = f'{pklfile_prefix}.pkl'\n            mmcv.dump(det_annos, out)\n            print('Result is saved to %s' % out)\n\n        if submission_prefix is not None:\n            # save file in submission format\n            mmcv.mkdir_or_exist(submission_prefix)\n            print(f'Saving KITTI submission to {submission_prefix}')\n            for i, anno in enumerate(det_annos):\n                sample_idx = self.anno_infos[i]['image']['image_idx']\n                cur_det_file = f'{submission_prefix}/{sample_idx:06d}.txt'\n                with open(cur_det_file, 'w') as f:\n                    bbox = anno['bbox']\n                    loc = anno['location']\n                    dims = anno['dimensions'][::-1]  # lhw -> hwl\n                    for idx in range(len(bbox)):\n                        print(\n                            '{} -1 -1 {:4f} {:4f} {:4f} {:4f} {:4f} {:4f} '\n                            '{:4f} {:4f} {:4f} {:4f} {:4f} {:4f} {:4f}'.format(\n                                anno['name'][idx],\n                                anno['alpha'][idx],\n                                *bbox[idx],  # 4 float\n                                *dims[idx],  # 3 float\n                                *loc[idx],  # 3 float\n                                anno['rotation_y'][idx],\n                                anno['score'][idx]),\n                            file=f,\n                        )\n            print(f'Result is saved to {submission_prefix}')\n\n        return det_annos\n\n    def convert_valid_bboxes(self, box_dict, info):\n        \"\"\"Convert the predicted boxes into valid ones.\n\n        Args:\n            box_dict (dict): Box dictionaries to be converted.\n                - boxes_3d (:obj:`CameraInstance3DBoxes`): 3D bounding boxes.\n                - scores_3d (torch.Tensor): Scores of boxes.\n                - labels_3d (torch.Tensor): Class labels of boxes.\n            info (dict): Data info.\n\n        Returns:\n            dict: Valid predicted boxes.\n                - bbox (np.ndarray): 2D bounding boxes.\n                - box3d_camera (np.ndarray): 3D bounding boxes in\n                    camera coordinate.\n                - scores (np.ndarray): Scores of boxes.\n                - label_preds (np.ndarray): Class label predictions.\n                - sample_idx (int): Sample index.\n        \"\"\"\n        box_preds = box_dict['boxes_3d']\n        scores = box_dict['scores_3d']\n        labels = box_dict['labels_3d']\n        sample_idx = info['image']['image_idx']\n\n        if len(box_preds) == 0:\n            return dict(\n                bbox=np.zeros([0, 4]),\n                box3d_camera=np.zeros([0, 7]),\n                scores=np.zeros([0]),\n                label_preds=np.zeros([0, 4]),\n                sample_idx=sample_idx)\n\n        rect = info['calib']['R0_rect'].astype(np.float32)\n        Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)\n        P2 = info['calib']['P2'].astype(np.float32)\n        img_shape = info['image']['image_shape']\n        P2 = box_preds.tensor.new_tensor(P2)\n\n        box_preds_camera = box_preds\n        box_preds_lidar = box_preds.convert_to(Box3DMode.LIDAR,\n                                               np.linalg.inv(rect @ Trv2c))\n\n        box_corners = box_preds_camera.corners\n        box_corners_in_image = points_cam2img(box_corners, P2)\n        # box_corners_in_image: [N, 8, 2]\n        minxy = torch.min(box_corners_in_image, dim=1)[0]\n        maxxy = torch.max(box_corners_in_image, dim=1)[0]\n        box_2d_preds = torch.cat([minxy, maxxy], dim=1)\n        # Post-processing\n        # check box_preds_camera\n        image_shape = box_preds.tensor.new_tensor(img_shape)\n        valid_cam_inds = ((box_2d_preds[:, 0] < image_shape[1]) &\n                          (box_2d_preds[:, 1] < image_shape[0]) &\n                          (box_2d_preds[:, 2] > 0) & (box_2d_preds[:, 3] > 0))\n        # check box_preds\n        valid_inds = valid_cam_inds\n\n        if valid_inds.sum() > 0:\n            return dict(\n                bbox=box_2d_preds[valid_inds, :].numpy(),\n                box3d_camera=box_preds_camera[valid_inds].tensor.numpy(),\n                box3d_lidar=box_preds_lidar[valid_inds].tensor.numpy(),\n                scores=scores[valid_inds].numpy(),\n                label_preds=labels[valid_inds].numpy(),\n                sample_idx=sample_idx)\n        else:\n            return dict(\n                bbox=np.zeros([0, 4]),\n                box3d_camera=np.zeros([0, 7]),\n                box3d_lidar=np.zeros([0, 7]),\n                scores=np.zeros([0]),\n                label_preds=np.zeros([0, 4]),\n                sample_idx=sample_idx)\n"
  },
  {
    "path": "mmdet3d/datasets/lyft_dataset.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os\nimport tempfile\nfrom os import path as osp\n\nimport mmcv\nimport numpy as np\nimport pandas as pd\nfrom lyft_dataset_sdk.lyftdataset import LyftDataset as Lyft\nfrom lyft_dataset_sdk.utils.data_classes import Box as LyftBox\nfrom pyquaternion import Quaternion\n\nfrom mmdet3d.core.evaluation.lyft_eval import lyft_eval\nfrom ..core import show_result\nfrom ..core.bbox import Box3DMode, Coord3DMode, LiDARInstance3DBoxes\nfrom .builder import DATASETS\nfrom .custom_3d import Custom3DDataset\nfrom .pipelines import Compose\n\n\n@DATASETS.register_module()\nclass LyftDataset(Custom3DDataset):\n    r\"\"\"Lyft Dataset.\n\n    This class serves as the API for experiments on the Lyft Dataset.\n\n    Please refer to\n    `<https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/data>`_\n    for data downloading.\n\n    Args:\n        ann_file (str): Path of annotation file.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        data_root (str): Path of dataset root.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        load_interval (int, optional): Interval of loading the dataset. It is\n            used to uniformly sample the dataset. Defaults to 1.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        box_type_3d (str, optional): Type of 3D box of this dataset.\n            Based on the `box_type_3d`, the dataset will encapsulate the box\n            to its original format then converted them to `box_type_3d`.\n            Defaults to 'LiDAR' in this dataset. Available options includes\n\n            - 'LiDAR': Box in LiDAR coordinates.\n            - 'Depth': Box in depth coordinates, usually for indoor dataset.\n            - 'Camera': Box in camera coordinates.\n        filter_empty_gt (bool, optional): Whether to filter empty GT.\n            Defaults to True.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n    \"\"\"  # noqa: E501\n    NameMapping = {\n        'bicycle': 'bicycle',\n        'bus': 'bus',\n        'car': 'car',\n        'emergency_vehicle': 'emergency_vehicle',\n        'motorcycle': 'motorcycle',\n        'other_vehicle': 'other_vehicle',\n        'pedestrian': 'pedestrian',\n        'truck': 'truck',\n        'animal': 'animal'\n    }\n    DefaultAttribute = {\n        'car': 'is_stationary',\n        'truck': 'is_stationary',\n        'bus': 'is_stationary',\n        'emergency_vehicle': 'is_stationary',\n        'other_vehicle': 'is_stationary',\n        'motorcycle': 'is_stationary',\n        'bicycle': 'is_stationary',\n        'pedestrian': 'is_stationary',\n        'animal': 'is_stationary'\n    }\n    CLASSES = ('car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle',\n               'motorcycle', 'bicycle', 'pedestrian', 'animal')\n\n    def __init__(self,\n                 ann_file,\n                 pipeline=None,\n                 data_root=None,\n                 classes=None,\n                 load_interval=1,\n                 modality=None,\n                 box_type_3d='LiDAR',\n                 filter_empty_gt=True,\n                 test_mode=False,\n                 **kwargs):\n        self.load_interval = load_interval\n        super().__init__(\n            data_root=data_root,\n            ann_file=ann_file,\n            pipeline=pipeline,\n            classes=classes,\n            modality=modality,\n            box_type_3d=box_type_3d,\n            filter_empty_gt=filter_empty_gt,\n            test_mode=test_mode,\n            **kwargs)\n\n        if self.modality is None:\n            self.modality = dict(\n                use_camera=False,\n                use_lidar=True,\n                use_radar=False,\n                use_map=False,\n                use_external=False,\n            )\n\n    def load_annotations(self, ann_file):\n        \"\"\"Load annotations from ann_file.\n\n        Args:\n            ann_file (str): Path of the annotation file.\n\n        Returns:\n            list[dict]: List of annotations sorted by timestamps.\n        \"\"\"\n        # loading data from a file-like object needs file format\n        data = mmcv.load(ann_file, file_format='pkl')\n        data_infos = list(sorted(data['infos'], key=lambda e: e['timestamp']))\n        data_infos = data_infos[::self.load_interval]\n        self.metadata = data['metadata']\n        self.version = self.metadata['version']\n        return data_infos\n\n    def get_data_info(self, index):\n        \"\"\"Get data info according to the given index.\n\n        Args:\n            index (int): Index of the sample data to get.\n\n        Returns:\n            dict: Data information that will be passed to the data\n                preprocessing pipelines. It includes the following keys:\n\n                - sample_idx (str): sample index\n                - pts_filename (str): filename of point clouds\n                - sweeps (list[dict]): infos of sweeps\n                - timestamp (float): sample timestamp\n                - img_filename (str, optional): image filename\n                - lidar2img (list[np.ndarray], optional): transformations\n                    from lidar to different cameras\n                - ann_info (dict): annotation info\n        \"\"\"\n        info = self.data_infos[index]\n\n        # standard protocol modified from SECOND.Pytorch\n        input_dict = dict(\n            sample_idx=info['token'],\n            pts_filename=info['lidar_path'],\n            sweeps=info['sweeps'],\n            timestamp=info['timestamp'] / 1e6,\n        )\n\n        if self.modality['use_camera']:\n            image_paths = []\n            lidar2img_rts = []\n            for cam_type, cam_info in info['cams'].items():\n                image_paths.append(cam_info['data_path'])\n                # obtain lidar to image transformation matrix\n                lidar2cam_r = np.linalg.inv(cam_info['sensor2lidar_rotation'])\n                lidar2cam_t = cam_info[\n                    'sensor2lidar_translation'] @ lidar2cam_r.T\n                lidar2cam_rt = np.eye(4)\n                lidar2cam_rt[:3, :3] = lidar2cam_r.T\n                lidar2cam_rt[3, :3] = -lidar2cam_t\n                intrinsic = cam_info['cam_intrinsic']\n                viewpad = np.eye(4)\n                viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic\n                lidar2img_rt = (viewpad @ lidar2cam_rt.T)\n                lidar2img_rts.append(lidar2img_rt)\n\n            input_dict.update(\n                dict(\n                    img_filename=image_paths,\n                    lidar2img=lidar2img_rts,\n                ))\n\n        if not self.test_mode:\n            annos = self.get_ann_info(index)\n            input_dict['ann_info'] = annos\n\n        return input_dict\n\n    def get_ann_info(self, index):\n        \"\"\"Get annotation info according to the given index.\n\n        Args:\n            index (int): Index of the annotation data to get.\n\n        Returns:\n            dict: Annotation information consists of the following keys:\n\n                - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`):\n                    3D ground truth bboxes.\n                - gt_labels_3d (np.ndarray): Labels of ground truths.\n                - gt_names (list[str]): Class names of ground truths.\n        \"\"\"\n        info = self.data_infos[index]\n        gt_bboxes_3d = info['gt_boxes']\n        gt_names_3d = info['gt_names']\n        gt_labels_3d = []\n        for cat in gt_names_3d:\n            if cat in self.CLASSES:\n                gt_labels_3d.append(self.CLASSES.index(cat))\n            else:\n                gt_labels_3d.append(-1)\n        gt_labels_3d = np.array(gt_labels_3d)\n\n        if 'gt_shape' in info:\n            gt_shape = info['gt_shape']\n            gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_shape], axis=-1)\n\n        # the lyft box center is [0.5, 0.5, 0.5], we change it to be\n        # the same as KITTI (0.5, 0.5, 0)\n        gt_bboxes_3d = LiDARInstance3DBoxes(\n            gt_bboxes_3d,\n            box_dim=gt_bboxes_3d.shape[-1],\n            origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)\n\n        anns_results = dict(\n            gt_bboxes_3d=gt_bboxes_3d,\n            gt_labels_3d=gt_labels_3d,\n        )\n        return anns_results\n\n    def _format_bbox(self, results, jsonfile_prefix=None):\n        \"\"\"Convert the results to the standard format.\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            jsonfile_prefix (str): The prefix of the output jsonfile.\n                You can specify the output directory/filename by\n                modifying the jsonfile_prefix. Default: None.\n\n        Returns:\n            str: Path of the output json file.\n        \"\"\"\n        lyft_annos = {}\n        mapped_class_names = self.CLASSES\n\n        print('Start to convert detection format...')\n        for sample_id, det in enumerate(mmcv.track_iter_progress(results)):\n            annos = []\n            boxes = output_to_lyft_box(det)\n            sample_token = self.data_infos[sample_id]['token']\n            boxes = lidar_lyft_box_to_global(self.data_infos[sample_id], boxes)\n            for i, box in enumerate(boxes):\n                name = mapped_class_names[box.label]\n                lyft_anno = dict(\n                    sample_token=sample_token,\n                    translation=box.center.tolist(),\n                    size=box.wlh.tolist(),\n                    rotation=box.orientation.elements.tolist(),\n                    name=name,\n                    score=box.score)\n                annos.append(lyft_anno)\n            lyft_annos[sample_token] = annos\n        lyft_submissions = {\n            'meta': self.modality,\n            'results': lyft_annos,\n        }\n\n        mmcv.mkdir_or_exist(jsonfile_prefix)\n        res_path = osp.join(jsonfile_prefix, 'results_lyft.json')\n        print('Results writes to', res_path)\n        mmcv.dump(lyft_submissions, res_path)\n        return res_path\n\n    def _evaluate_single(self,\n                         result_path,\n                         logger=None,\n                         metric='bbox',\n                         result_name='pts_bbox'):\n        \"\"\"Evaluation for a single model in Lyft protocol.\n\n        Args:\n            result_path (str): Path of the result file.\n            logger (logging.Logger | str, optional): Logger used for printing\n                related information during evaluation. Default: None.\n            metric (str, optional): Metric name used for evaluation.\n                Default: 'bbox'.\n            result_name (str, optional): Result name in the metric prefix.\n                Default: 'pts_bbox'.\n\n        Returns:\n            dict: Dictionary of evaluation details.\n        \"\"\"\n\n        output_dir = osp.join(*osp.split(result_path)[:-1])\n        lyft = Lyft(\n            data_path=osp.join(self.data_root, self.version),\n            json_path=osp.join(self.data_root, self.version, self.version),\n            verbose=True)\n        eval_set_map = {\n            'v1.01-train': 'val',\n        }\n        metrics = lyft_eval(lyft, self.data_root, result_path,\n                            eval_set_map[self.version], output_dir, logger)\n\n        # record metrics\n        detail = dict()\n        metric_prefix = f'{result_name}_Lyft'\n\n        for i, name in enumerate(metrics['class_names']):\n            AP = float(metrics['mAPs_cate'][i])\n            detail[f'{metric_prefix}/{name}_AP'] = AP\n\n        detail[f'{metric_prefix}/mAP'] = metrics['Final mAP']\n        return detail\n\n    def format_results(self, results, jsonfile_prefix=None, csv_savepath=None):\n        \"\"\"Format the results to json (standard format for COCO evaluation).\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            jsonfile_prefix (str): The prefix of json files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n            csv_savepath (str): The path for saving csv files.\n                It includes the file path and the csv filename,\n                e.g., \"a/b/filename.csv\". If not specified,\n                the result will not be converted to csv file.\n\n        Returns:\n            tuple: Returns (result_files, tmp_dir), where `result_files` is a\n                dict containing the json filepaths, `tmp_dir` is the temporal\n                directory created for saving json files when\n                `jsonfile_prefix` is not specified.\n        \"\"\"\n        assert isinstance(results, list), 'results must be a list'\n        assert len(results) == len(self), (\n            'The length of results is not equal to the dataset len: {} != {}'.\n            format(len(results), len(self)))\n\n        if jsonfile_prefix is None:\n            tmp_dir = tempfile.TemporaryDirectory()\n            jsonfile_prefix = osp.join(tmp_dir.name, 'results')\n        else:\n            tmp_dir = None\n\n        # currently the output prediction results could be in two formats\n        # 1. list of dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...)\n        # 2. list of dict('pts_bbox' or 'img_bbox':\n        #     dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...))\n        # this is a workaround to enable evaluation of both formats on Lyft\n        # refer to https://github.com/open-mmlab/mmdetection3d/issues/449\n        if not ('pts_bbox' in results[0] or 'img_bbox' in results[0]):\n            result_files = self._format_bbox(results, jsonfile_prefix)\n        else:\n            # should take the inner dict out of 'pts_bbox' or 'img_bbox' dict\n            result_files = dict()\n            for name in results[0]:\n                print(f'\\nFormating bboxes of {name}')\n                results_ = [out[name] for out in results]\n                tmp_file_ = osp.join(jsonfile_prefix, name)\n                result_files.update(\n                    {name: self._format_bbox(results_, tmp_file_)})\n        if csv_savepath is not None:\n            self.json2csv(result_files['pts_bbox'], csv_savepath)\n        return result_files, tmp_dir\n\n    def evaluate(self,\n                 results,\n                 metric='bbox',\n                 logger=None,\n                 jsonfile_prefix=None,\n                 csv_savepath=None,\n                 result_names=['pts_bbox'],\n                 show=False,\n                 out_dir=None,\n                 pipeline=None):\n        \"\"\"Evaluation in Lyft protocol.\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            metric (str | list[str], optional): Metrics to be evaluated.\n                Default: 'bbox'.\n            logger (logging.Logger | str, optional): Logger used for printing\n                related information during evaluation. Default: None.\n            jsonfile_prefix (str, optional): The prefix of json files including\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n            csv_savepath (str, optional): The path for saving csv files.\n                It includes the file path and the csv filename,\n                e.g., \"a/b/filename.csv\". If not specified,\n                the result will not be converted to csv file.\n            result_names (list[str], optional): Result names in the\n                metric prefix. Default: ['pts_bbox'].\n            show (bool, optional): Whether to visualize.\n                Default: False.\n            out_dir (str, optional): Path to save the visualization results.\n                Default: None.\n            pipeline (list[dict], optional): raw data loading for showing.\n                Default: None.\n\n        Returns:\n            dict[str, float]: Evaluation results.\n        \"\"\"\n        result_files, tmp_dir = self.format_results(results, jsonfile_prefix,\n                                                    csv_savepath)\n\n        if isinstance(result_files, dict):\n            results_dict = dict()\n            for name in result_names:\n                print(f'Evaluating bboxes of {name}')\n                ret_dict = self._evaluate_single(result_files[name])\n            results_dict.update(ret_dict)\n        elif isinstance(result_files, str):\n            results_dict = self._evaluate_single(result_files)\n\n        if tmp_dir is not None:\n            tmp_dir.cleanup()\n\n        if show or out_dir:\n            self.show(results, out_dir, show=show, pipeline=pipeline)\n        return results_dict\n\n    def _build_default_pipeline(self):\n        \"\"\"Build the default pipeline for this dataset.\"\"\"\n        pipeline = [\n            dict(\n                type='LoadPointsFromFile',\n                coord_type='LIDAR',\n                load_dim=5,\n                use_dim=5,\n                file_client_args=dict(backend='disk')),\n            dict(\n                type='LoadPointsFromMultiSweeps',\n                sweeps_num=10,\n                file_client_args=dict(backend='disk')),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=self.CLASSES,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ]\n        return Compose(pipeline)\n\n    def show(self, results, out_dir, show=False, pipeline=None):\n        \"\"\"Results visualization.\n\n        Args:\n            results (list[dict]): List of bounding boxes results.\n            out_dir (str): Output directory of visualization result.\n            show (bool): Whether to visualize the results online.\n                Default: False.\n            pipeline (list[dict], optional): raw data loading for showing.\n                Default: None.\n        \"\"\"\n        assert out_dir is not None, 'Expect out_dir, got none.'\n        pipeline = self._get_pipeline(pipeline)\n        for i, result in enumerate(results):\n            if 'pts_bbox' in result.keys():\n                result = result['pts_bbox']\n            data_info = self.data_infos[i]\n            pts_path = data_info['lidar_path']\n            file_name = osp.split(pts_path)[-1].split('.')[0]\n            points = self._extract_data(i, pipeline, 'points').numpy()\n            points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR,\n                                               Coord3DMode.DEPTH)\n            inds = result['scores_3d'] > 0.1\n            gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()\n            show_gt_bboxes = Box3DMode.convert(gt_bboxes, Box3DMode.LIDAR,\n                                               Box3DMode.DEPTH)\n            pred_bboxes = result['boxes_3d'][inds].tensor.numpy()\n            show_pred_bboxes = Box3DMode.convert(pred_bboxes, Box3DMode.LIDAR,\n                                                 Box3DMode.DEPTH)\n            show_result(points, show_gt_bboxes, show_pred_bboxes, out_dir,\n                        file_name, show)\n\n    def json2csv(self, json_path, csv_savepath):\n        \"\"\"Convert the json file to csv format for submission.\n\n        Args:\n            json_path (str): Path of the result json file.\n            csv_savepath (str): Path to save the csv file.\n        \"\"\"\n        results = mmcv.load(json_path)['results']\n        sample_list_path = osp.join(self.data_root, 'sample_submission.csv')\n        data = pd.read_csv(sample_list_path)\n        Id_list = list(data['Id'])\n        pred_list = list(data['PredictionString'])\n        cnt = 0\n        print('Converting the json to csv...')\n        for token in results.keys():\n            cnt += 1\n            predictions = results[token]\n            prediction_str = ''\n            for i in range(len(predictions)):\n                prediction_str += \\\n                    str(predictions[i]['score']) + ' ' + \\\n                    str(predictions[i]['translation'][0]) + ' ' + \\\n                    str(predictions[i]['translation'][1]) + ' ' + \\\n                    str(predictions[i]['translation'][2]) + ' ' + \\\n                    str(predictions[i]['size'][0]) + ' ' + \\\n                    str(predictions[i]['size'][1]) + ' ' + \\\n                    str(predictions[i]['size'][2]) + ' ' + \\\n                    str(Quaternion(list(predictions[i]['rotation']))\n                        .yaw_pitch_roll[0]) + ' ' + \\\n                    predictions[i]['name'] + ' '\n            prediction_str = prediction_str[:-1]\n            idx = Id_list.index(token)\n            pred_list[idx] = prediction_str\n        df = pd.DataFrame({'Id': Id_list, 'PredictionString': pred_list})\n        mmcv.mkdir_or_exist(os.path.dirname(csv_savepath))\n        df.to_csv(csv_savepath, index=False)\n\n\ndef output_to_lyft_box(detection):\n    \"\"\"Convert the output to the box class in the Lyft.\n\n    Args:\n        detection (dict): Detection results.\n\n    Returns:\n        list[:obj:`LyftBox`]: List of standard LyftBoxes.\n    \"\"\"\n    box3d = detection['boxes_3d']\n    scores = detection['scores_3d'].numpy()\n    labels = detection['labels_3d'].numpy()\n\n    box_gravity_center = box3d.gravity_center.numpy()\n    box_dims = box3d.dims.numpy()\n    box_yaw = box3d.yaw.numpy()\n\n    # our LiDAR coordinate system -> Lyft box coordinate system\n    lyft_box_dims = box_dims[:, [1, 0, 2]]\n\n    box_list = []\n    for i in range(len(box3d)):\n        quat = Quaternion(axis=[0, 0, 1], radians=box_yaw[i])\n        box = LyftBox(\n            box_gravity_center[i],\n            lyft_box_dims[i],\n            quat,\n            label=labels[i],\n            score=scores[i])\n        box_list.append(box)\n    return box_list\n\n\ndef lidar_lyft_box_to_global(info, boxes):\n    \"\"\"Convert the box from ego to global coordinate.\n\n    Args:\n        info (dict): Info for a specific sample data, including the\n            calibration information.\n        boxes (list[:obj:`LyftBox`]): List of predicted LyftBoxes.\n\n    Returns:\n        list: List of standard LyftBoxes in the global\n            coordinate.\n    \"\"\"\n    box_list = []\n    for box in boxes:\n        # Move box to ego vehicle coord system\n        box.rotate(Quaternion(info['lidar2ego_rotation']))\n        box.translate(np.array(info['lidar2ego_translation']))\n        # Move box to global coord system\n        box.rotate(Quaternion(info['ego2global_rotation']))\n        box.translate(np.array(info['ego2global_translation']))\n        box_list.append(box)\n    return box_list\n"
  },
  {
    "path": "mmdet3d/datasets/map_utils/mean_ap.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom multiprocessing import Pool\nfrom shapely.geometry import LineString, Polygon\nimport mmcv\nimport numpy as np\nfrom mmcv.utils import print_log\nfrom terminaltables import AsciiTable\nimport json\nfrom os import path as osp\nimport os\nfrom functools import partial\nfrom .tpfp import tpfp_gen, custom_tpfp_gen\n\ndef average_precision(recalls, precisions, mode='area'):\n    \"\"\"Calculate average precision (for single or multiple scales).\n\n    Args:\n        recalls (ndarray): shape (num_scales, num_dets) or (num_dets, )\n        precisions (ndarray): shape (num_scales, num_dets) or (num_dets, )\n        mode (str): 'area' or '11points', 'area' means calculating the area\n            under precision-recall curve, '11points' means calculating\n            the average precision of recalls at [0, 0.1, ..., 1]\n\n    Returns:\n        float or ndarray: calculated average precision\n    \"\"\"\n    no_scale = False\n    if recalls.ndim == 1:\n        no_scale = True\n        recalls = recalls[np.newaxis, :]\n        precisions = precisions[np.newaxis, :]\n    assert recalls.shape == precisions.shape and recalls.ndim == 2\n    num_scales = recalls.shape[0]\n    ap = np.zeros(num_scales, dtype=np.float32)\n    if mode == 'area':\n        zeros = np.zeros((num_scales, 1), dtype=recalls.dtype)\n        ones = np.ones((num_scales, 1), dtype=recalls.dtype)\n        mrec = np.hstack((zeros, recalls, ones))\n        mpre = np.hstack((zeros, precisions, zeros))\n        for i in range(mpre.shape[1] - 1, 0, -1):\n            mpre[:, i - 1] = np.maximum(mpre[:, i - 1], mpre[:, i])\n        for i in range(num_scales):\n            ind = np.where(mrec[i, 1:] != mrec[i, :-1])[0]\n            ap[i] = np.sum(\n                (mrec[i, ind + 1] - mrec[i, ind]) * mpre[i, ind + 1])\n    elif mode == '11points':\n        for i in range(num_scales):\n            for thr in np.arange(0, 1 + 1e-3, 0.1):\n                precs = precisions[i, recalls[i, :] >= thr]\n                prec = precs.max() if precs.size > 0 else 0\n                ap[i] += prec\n        ap /= 11\n    else:\n        raise ValueError(\n            'Unrecognized mode, only \"area\" and \"11points\" are supported')\n    if no_scale:\n        ap = ap[0]\n    return ap\n\ndef get_cls_results(gen_results, \n                    annotations, \n                    num_sample=100, \n                    num_pred_pts_per_instance=30,\n                    eval_use_same_gt_sample_num_flag=False,\n                    class_id=0, \n                    fix_interval=False):\n    \"\"\"Get det results and gt information of a certain class.\n\n    Args:\n        gen_results (list[list]): Same as `eval_map()`.\n        annotations (list[dict]): Same as `eval_map()`.\n        class_id (int): ID of a specific class.\n\n    Returns:\n        tuple[list[np.ndarray]]: detected bboxes, gt bboxes\n    \"\"\"\n    # if len(gen_results) == 0 or \n\n    cls_gens, cls_scores = [], []\n    for res in gen_results['vectors']:\n        if res['type'] == class_id:\n            if len(res['pts']) < 2:\n                continue\n            if not eval_use_same_gt_sample_num_flag:\n                sampled_points = np.array(res['pts'])\n            else:\n                line = res['pts']\n                line = LineString(line)\n\n                if fix_interval:\n                    distances = list(np.arange(1., line.length, 1.))\n                    distances = [0,] + distances + [line.length,]\n                    sampled_points = np.array([list(line.interpolate(distance).coords)\n                                            for distance in distances]).reshape(-1, 2)\n                else:\n                    distances = np.linspace(0, line.length, num_sample)\n                    sampled_points = np.array([list(line.interpolate(distance).coords)\n                                                for distance in distances]).reshape(-1, 2)\n                \n            cls_gens.append(sampled_points)\n            cls_scores.append(res['confidence_level'])\n    num_res = len(cls_gens)\n    if num_res > 0:\n        cls_gens = np.stack(cls_gens).reshape(num_res,-1)\n        cls_scores = np.array(cls_scores)[:,np.newaxis]\n        cls_gens = np.concatenate([cls_gens,cls_scores],axis=-1)\n        # print(f'for class {i}, cls_gens has shape {cls_gens.shape}')\n    else:\n        if not eval_use_same_gt_sample_num_flag:\n            cls_gens = np.zeros((0,num_pred_pts_per_instance*2+1))\n        else:\n            cls_gens = np.zeros((0,num_sample*2+1))\n        # print(f'for class {i}, cls_gens has shape {cls_gens.shape}')\n\n    cls_gts = []\n    for ann in annotations['vectors']:\n        if ann['type'] == class_id:\n            # line = ann['pts'] +  np.array((1,1)) # for hdmapnet\n            line = ann['pts']\n            # line = ann['pts'].cumsum(0)\n            line = LineString(line)\n            distances = np.linspace(0, line.length, num_sample)\n            sampled_points = np.array([list(line.interpolate(distance).coords)\n                                        for distance in distances]).reshape(-1, 2)\n            \n            cls_gts.append(sampled_points)\n    num_gts = len(cls_gts)\n    if num_gts > 0:\n        cls_gts = np.stack(cls_gts).reshape(num_gts,-1)\n    else:\n        cls_gts = np.zeros((0,num_sample*2))\n    return cls_gens, cls_gts\n    # ones = np.ones((num_gts,1))\n    # tmp_cls_gens = np.concatenate([cls_gts,ones],axis=-1)\n    # return tmp_cls_gens, cls_gts\n\ndef format_res_gt_by_classes(result_path,\n                             gen_results,\n                             annotations,\n                             cls_names=None,\n                             num_pred_pts_per_instance=30,\n                             eval_use_same_gt_sample_num_flag=False,\n                             pc_range=[-15.0, -30.0, -5.0, 15.0, 30.0, 3.0],\n                             nproc=24):\n    assert cls_names is not None\n    timer = mmcv.Timer()\n    num_fixed_sample_pts = 100\n    fix_interval = False\n    print('results path: {}'.format(result_path))\n\n    output_dir = osp.join(*osp.split(result_path)[:-1])\n    assert len(gen_results) == len(annotations)\n    \n\n    gen_results = [gen_results[each['sample_token']] for each in annotations]\n    pool = Pool(nproc)\n    cls_gens, cls_gts = {}, {}\n    print('Formatting ...')\n    formatting_file = 'cls_formatted.pkl'\n    formatting_file = osp.join(output_dir,formatting_file)\n\n    # for vis\n    if False:\n        from PIL import Image\n        import matplotlib.pyplot as plt\n        from matplotlib import transforms\n        from matplotlib.patches import Rectangle\n\n        show_dir = osp.join(output_dir,'vis_json')\n        mmcv.mkdir_or_exist(osp.abspath(show_dir))\n        # import pdb;pdb.set_trace()\n        car_img = Image.open('./figs/lidar_car.png')\n        colors_plt = ['r', 'b', 'g']\n        for i in range(20):\n\n            plt.figure(figsize=(2, 4))\n            plt.xlim(pc_range[0], pc_range[3])\n            plt.ylim(pc_range[1], pc_range[4])\n            plt.axis('off')\n\n            for line in gen_results[i]['vectors']:\n                l = np.array(line['pts'])\n                plt.plot(l[:,0],l[:,1],'-', \n                # color=colors[line['type']]\n                color = 'red',\n                )\n\n            for line in annotations[i]['vectors']:\n                # l = np.array(line['pts']) + np.array((1,1))\n                l = np.array(line['pts'])\n                # l = line['pts']\n                plt.plot(l[:,0],l[:,1],'-', \n                    # color=colors[line['type']],\n                    color = 'blue',\n                    )\n            plt.imshow(car_img, extent=[-1.2, 1.2, -1.5, 1.5])\n            map_path = osp.join(show_dir, 'COMPARE_MAP_{}.jpg'.format(i))\n            plt.savefig(map_path, bbox_inches='tight', dpi=400)\n            plt.close()\n\n    for i, clsname in enumerate(cls_names):\n\n        gengts = pool.starmap(\n                    partial(get_cls_results, num_sample=num_fixed_sample_pts,\n                        num_pred_pts_per_instance=num_pred_pts_per_instance,\n                        eval_use_same_gt_sample_num_flag=eval_use_same_gt_sample_num_flag,class_id=i,fix_interval=fix_interval),\n                    zip(gen_results, annotations))   \n        # gengts = map(partial(get_cls_results, num_sample=num_fixed_sample_pts, class_id=i,fix_interval=fix_interval),\n        #             zip(gen_results, annotations))\n        # import pdb;pdb.set_trace()\n        gens, gts = tuple(zip(*gengts))\n        cls_gens[clsname] = gens\n        cls_gts[clsname] = gts\n    \n    mmcv.dump([cls_gens, cls_gts],formatting_file)\n    print('Cls data formatting done in {:2f}s!! with {}'.format(float(timer.since_start()),formatting_file))\n    pool.close()\n    return cls_gens, cls_gts\n\ndef eval_map(gen_results,\n             annotations,\n             cls_gens,\n             cls_gts,\n             threshold=0.5,\n             cls_names=None,\n             logger=None,\n             tpfp_fn=None,\n             pc_range=[-15.0, -30.0, -5.0, 15.0, 30.0, 3.0],\n             metric=None,\n             num_pred_pts_per_instance=30,\n             nproc=24):\n    timer = mmcv.Timer()\n    pool = Pool(nproc)\n\n    eval_results = []\n    \n    for i, clsname in enumerate(cls_names):\n        \n        # get gt and det bboxes of this class\n        cls_gen = cls_gens[clsname]\n        cls_gt = cls_gts[clsname]\n        # choose proper function according to datasets to compute tp and fp\n        # XXX\n        # func_name = cls2func[clsname]\n        # tpfp_fn = tpfp_fn_dict[tpfp_fn_name]\n        tpfp_fn = custom_tpfp_gen\n        # Trick for serialized\n        # only top-level function can be serized\n        # somehow use partitial the return function is defined\n        # at the top level.\n\n        # tpfp = tpfp_fn(cls_gen[i], cls_gt[i],threshold=threshold, metric=metric)\n        # import pdb; pdb.set_trace()\n        # TODO this is a hack\n        tpfp_fn = partial(tpfp_fn, threshold=threshold, metric=metric)\n        args = []\n        # compute tp and fp for each image with multiple processes\n        tpfp = pool.starmap(\n            tpfp_fn,\n            zip(cls_gen, cls_gt, *args))\n        # import pdb;pdb.set_trace()\n        tp, fp = tuple(zip(*tpfp))\n\n\n\n        # map_results = map(\n        #     tpfp_fn,\n        #     cls_gen, cls_gt)\n        # tp, fp = tuple(map(list, zip(*map_results)))\n\n\n        # debug and testing\n        # for i in range(len(cls_gen)):\n        #     # print(i)\n        #     tpfp = tpfp_fn(cls_gen[i], cls_gt[i],threshold=threshold)\n        #     print(i)\n        #     tpfp = (tpfp,)\n        #     print(tpfp)\n        # i = 0 \n        # tpfp = tpfp_fn(cls_gen[i], cls_gt[i],threshold=threshold)\n        # import pdb; pdb.set_trace()\n\n        # XXX\n        \n        num_gts = 0\n        for j, bbox in enumerate(cls_gt):\n            num_gts += bbox.shape[0]\n\n        # sort all det bboxes by score, also sort tp and fp\n        # import pdb;pdb.set_trace()\n        cls_gen = np.vstack(cls_gen)\n        num_dets = cls_gen.shape[0]\n        sort_inds = np.argsort(-cls_gen[:, -1]) #descending, high score front\n        tp = np.hstack(tp)[sort_inds]\n        fp = np.hstack(fp)[sort_inds]\n        \n        # calculate recall and precision with tp and fp\n        # num_det*num_res\n        tp = np.cumsum(tp, axis=0)\n        fp = np.cumsum(fp, axis=0)\n        eps = np.finfo(np.float32).eps\n        recalls = tp / np.maximum(num_gts, eps)\n        precisions = tp / np.maximum((tp + fp), eps)\n\n        # calculate AP\n        # if dataset != 'voc07' else '11points'\n        mode = 'area'\n        ap = average_precision(recalls, precisions, mode)\n        eval_results.append({\n            'num_gts': num_gts,\n            'num_dets': num_dets,\n            'recall': recalls,\n            'precision': precisions,\n            'ap': ap\n        })\n        print('cls:{} done in {:2f}s!!'.format(clsname,float(timer.since_last_check())))\n    pool.close()\n    aps = []\n    for cls_result in eval_results:\n        if cls_result['num_gts'] > 0:\n            aps.append(cls_result['ap'])\n    mean_ap = np.array(aps).mean().item() if len(aps) else 0.0\n\n    print_map_summary(\n        mean_ap, eval_results, class_name=cls_names, logger=logger)\n\n    return mean_ap, eval_results\n\n\n\ndef print_map_summary(mean_ap,\n                      results,\n                      class_name=None,\n                      scale_ranges=None,\n                      logger=None):\n    \"\"\"Print mAP and results of each class.\n\n    A table will be printed to show the gts/dets/recall/AP of each class and\n    the mAP.\n\n    Args:\n        mean_ap (float): Calculated from `eval_map()`.\n        results (list[dict]): Calculated from `eval_map()`.\n        dataset (list[str] | str | None): Dataset name or dataset classes.\n        scale_ranges (list[tuple] | None): Range of scales to be evaluated.\n        logger (logging.Logger | str | None): The way to print the mAP\n            summary. See `mmcv.utils.print_log()` for details. Default: None.\n    \"\"\"\n\n    if logger == 'silent':\n        return\n\n    if isinstance(results[0]['ap'], np.ndarray):\n        num_scales = len(results[0]['ap'])\n    else:\n        num_scales = 1\n\n    if scale_ranges is not None:\n        assert len(scale_ranges) == num_scales\n\n    num_classes = len(results)\n\n    recalls = np.zeros((num_scales, num_classes), dtype=np.float32)\n    aps = np.zeros((num_scales, num_classes), dtype=np.float32)\n    num_gts = np.zeros((num_scales, num_classes), dtype=int)\n    for i, cls_result in enumerate(results):\n        if cls_result['recall'].size > 0:\n            recalls[:, i] = np.array(cls_result['recall'], ndmin=2)[:, -1]\n        aps[:, i] = cls_result['ap']\n        num_gts[:, i] = cls_result['num_gts']\n\n    label_names = class_name\n\n    if not isinstance(mean_ap, list):\n        mean_ap = [mean_ap]\n\n    header = ['class', 'gts', 'dets', 'recall', 'ap']\n    for i in range(num_scales):\n        if scale_ranges is not None:\n            print_log(f'Scale range {scale_ranges[i]}', logger=logger)\n        table_data = [header]\n        for j in range(num_classes):\n            row_data = [\n                label_names[j], num_gts[i, j], results[j]['num_dets'],\n                f'{recalls[i, j]:.3f}', f'{aps[i, j]:.3f}'\n            ]\n            table_data.append(row_data)\n        table_data.append(['mAP', '', '', '', f'{mean_ap[i]:.3f}'])\n        table = AsciiTable(table_data)\n        table.inner_footing_row_border = True\n        print_log('\\n' + table.table, logger=logger)\n"
  },
  {
    "path": "mmdet3d/datasets/map_utils/tpfp.py",
    "content": "import mmcv\nimport numpy as np\n\nfrom mmdet.core.evaluation.bbox_overlaps import bbox_overlaps\nfrom .tpfp_chamfer import vec_iou, convex_iou, rbbox_iou, polyline_score, custom_polyline_score\nfrom shapely.geometry import LineString, Polygon\n# from vecmapnet_ops.ops.iou import convex_iou\n\ndef tpfp_bbox(det_bboxes,\n              gt_bboxes,\n              gt_bbox_masks,\n              threshold=0.5):\n    \"\"\"Check if detected bboxes are true positive or false positive.\n\n    Args:\n        det_bbox (ndarray): Detected bboxes of this image, of shape (m, 5).\n        gt_bboxes (ndarray): GT bboxes of this image, of shape (n, 4).\n        gt_bboxes_ignore (ndarray): Ignored gt bboxes of this image,\n            of shape (k, 4). Default: None\n        iou_thr (float): IoU threshold to be considered as matched.\n            Default: 0.5.\n        use_legacy_coordinate (bool): Whether to use coordinate system in\n            mmdet v1.x. which means width, height should be\n            calculated as 'x2 - x1 + 1` and 'y2 - y1 + 1' respectively.\n            Default: False.\n\n    Returns:\n        tuple[np.ndarray]: (tp, fp) whose elements are 0 and 1. The shape of\n        each array is (num_scales, m).\n    \"\"\"\n\n    num_dets = len(det_bboxes)\n    num_gts = len(gt_bboxes)\n\n    # tp and fp\n    tp = np.zeros((num_dets), dtype=np.float32)\n    fp = np.zeros((num_dets), dtype=np.float32)\n\n    # if there is no gt bboxes in this image, then all det bboxes\n    # within area range are false positives\n    # XXX\n    if num_gts == 0:\n        fp[...] = 1\n        return tp, fp\n    \n    if num_dets == 0:\n        return tp, fp\n    \n    # # distance matrix: n x m\n    bbox_p = det_bboxes[:, :-1].reshape(num_dets,-1,2)\n    bbox_g = gt_bboxes.reshape(num_gts,-1,2)\n    bbox_gm = gt_bbox_masks.reshape(num_gts,-1,2)\n    matrix = convex_iou(bbox_p,bbox_g,bbox_gm)\n\n    # for each det, the max iou with all gts\n    matrix_max = matrix.max(axis=1)\n    # for each det, which gt overlaps most with it\n    matrix_argmax = matrix.argmax(axis=1)\n    # sort all dets in descending order by scores\n    sort_inds = np.argsort(-det_bboxes[:, -1])\n\n    gt_covered = np.zeros(num_gts, dtype=bool)\n\n    # tp = 0 and fp = 0 means ignore this detected bbox,\n    for i in sort_inds:\n        if matrix_max[i] >= threshold:\n            matched_gt = matrix_argmax[i]\n            if not gt_covered[matched_gt]:\n                gt_covered[matched_gt] = True\n                tp[i] = 1\n            else:\n                fp[i] = 1\n        else:\n            fp[i] = 1\n\n    return tp, fp\n\n\ndef tpfp_rbbox(det_bboxes,\n              gt_bboxes,\n              gt_bbox_masks,\n              threshold=0.5):\n    \"\"\"Check if detected bboxes are true positive or false positive.\n\n    Args:\n        det_bbox (ndarray): Detected bboxes of this image, of shape (m, 5).\n        gt_bboxes (ndarray): GT bboxes of this image, of shape (n, 4).\n        gt_bboxes_ignore (ndarray): Ignored gt bboxes of this image,\n            of shape (k, 4). Default: None\n        iou_thr (float): IoU threshold to be considered as matched.\n            Default: 0.5.\n        use_legacy_coordinate (bool): Whether to use coordinate system in\n            mmdet v1.x. which means width, height should be\n            calculated as 'x2 - x1 + 1` and 'y2 - y1 + 1' respectively.\n            Default: False.\n\n    Returns:\n        tuple[np.ndarray]: (tp, fp) whose elements are 0 and 1. The shape of\n        each array is (num_scales, m).\n    \"\"\"\n\n    num_dets = len(det_bboxes)\n    num_gts = len(gt_bboxes)\n\n    # tp and fp\n    tp = np.zeros((num_dets), dtype=np.float32)\n    fp = np.zeros((num_dets), dtype=np.float32)\n\n    # if there is no gt bboxes in this image, then all det bboxes\n    # within area range are false positives\n    # XXX\n    if num_gts == 0:\n        fp[...] = 1\n        return tp, fp\n    \n    if num_dets == 0:\n        return tp, fp\n    \n    # # distance matrix: n x m\n    bbox_p = det_bboxes[:, :-1].reshape(num_dets,-1,2)\n    bbox_g = gt_bboxes.reshape(num_gts,-1,2)\n    bbox_gm = gt_bbox_masks.reshape(num_gts,-1,2)\n    matrix = rbbox_iou(bbox_p,bbox_g,bbox_gm)\n\n    # for each det, the max iou with all gts\n    matrix_max = matrix.max(axis=1)\n    # for each det, which gt overlaps most with it\n    matrix_argmax = matrix.argmax(axis=1)\n    # sort all dets in descending order by scores\n    sort_inds = np.argsort(-det_bboxes[:, -1])\n\n    gt_covered = np.zeros(num_gts, dtype=bool)\n\n    # tp = 0 and fp = 0 means ignore this detected bbox,\n    for i in sort_inds:\n        if matrix_max[i] >= threshold:\n            matched_gt = matrix_argmax[i]\n            if not gt_covered[matched_gt]:\n                gt_covered[matched_gt] = True\n                tp[i] = 1\n            else:\n                fp[i] = 1\n        else:\n            fp[i] = 1\n\n    return tp, fp\n\n\ndef tpfp_det(det_bboxes,\n             gt_bboxes,\n             threshold=0.5):\n    \"\"\"Check if detected bboxes are true positive or false positive.\n\n    Args:\n        det_bbox (ndarray): Detected bboxes of this image, of shape (m, 5).\n        gt_bboxes (ndarray): GT bboxes of this image, of shape (n, 4).\n        gt_bboxes_ignore (ndarray): Ignored gt bboxes of this image,\n            of shape (k, 4). Default: None\n        iou_thr (float): IoU threshold to be considered as matched.\n            Default: 0.5.\n        use_legacy_coordinate (bool): Whether to use coordinate system in\n            mmdet v1.x. which means width, height should be\n            calculated as 'x2 - x1 + 1` and 'y2 - y1 + 1' respectively.\n            Default: False.\n\n    Returns:\n        tuple[np.ndarray]: (tp, fp) whose elements are 0 and 1. The shape of\n        each array is (num_scales, m).\n    \"\"\"\n\n    num_dets = det_bboxes.shape[0]\n    num_gts = gt_bboxes.shape[0]\n\n    # tp and fp\n    tp = np.zeros((num_dets), dtype=np.float32)\n    fp = np.zeros((num_dets), dtype=np.float32)\n\n    # if there is no gt bboxes in this image, then all det bboxes\n    # within area range are false positives\n    # XXX\n    if num_gts == 0:\n        fp[...] = 1\n        return tp, fp\n    \n    if num_dets == 0:\n        return tp, fp\n    \n    # # distance matrix: n x m\n    matrix = vec_iou(\n            det_bboxes[:, :-1].reshape(num_dets,-1,2), \n            gt_bboxes.reshape(num_gts,-1,2))\n    # for each det, the max iou with all gts\n    matrix_max = matrix.max(axis=1)\n    # for each det, which gt overlaps most with it\n    matrix_argmax = matrix.argmax(axis=1)\n    # sort all dets in descending order by scores\n    sort_inds = np.argsort(-det_bboxes[:, -1])\n\n    gt_covered = np.zeros(num_gts, dtype=bool)\n\n    # tp = 0 and fp = 0 means ignore this detected bbox,\n    for i in sort_inds:\n        if matrix_max[i] >= threshold:\n            matched_gt = matrix_argmax[i]\n            if not gt_covered[matched_gt]:\n                gt_covered[matched_gt] = True\n                tp[i] = 1\n            else:\n                fp[i] = 1\n        else:\n            fp[i] = 1\n\n    return tp, fp\n\n\ndef tpfp_gen(gen_lines,\n             gt_lines,\n             threshold=0.5,\n             metric='POR'):\n    \"\"\"Check if detected bboxes are true positive or false positive.\n\n    Args:\n        det_bbox (ndarray): Detected bboxes of this image, of shape (m, 5).\n        gt_bboxes (ndarray): GT bboxes of this image, of shape (n, 4).\n        gt_bboxes_ignore (ndarray): Ignored gt bboxes of this image,\n            of shape (k, 4). Default: None\n        iou_thr (float): IoU threshold to be considered as matched.\n            Default: 0.5.\n        use_legacy_coordinate (bool): Whether to use coordinate system in\n            mmdet v1.x. which means width, height should be\n            calculated as 'x2 - x1 + 1` and 'y2 - y1 + 1' respectively.\n            Default: False.\n\n    Returns:\n        tuple[np.ndarray]: (tp, fp) whose elements are 0 and 1. The shape of\n        each array is (num_scales, m).\n    \"\"\"\n\n    num_gens = gen_lines.shape[0]\n    num_gts = gt_lines.shape[0]\n    \n    # tp and fp\n    tp = np.zeros((num_gens), dtype=np.float32)\n    fp = np.zeros((num_gens), dtype=np.float32)\n\n    # if there is no gt bboxes in this image, then all det bboxes\n    # within area range are false positives\n    if num_gts == 0:\n        fp[...] = 1\n        return tp, fp\n    \n    if num_gens == 0:\n        return tp, fp\n    \n    gen_scores = gen_lines[:,-1] # n\n    # distance matrix: n x m\n\n    # matrix = custom_polyline_score(\n    #         gen_lines[:,:-1].reshape(num_gens,-1,2), \n    #         gt_lines.reshape(num_gts,-1,2),linewidth=2.,metric=metric)\n\n    # TODO MAY bug here\n    matrix = polyline_score(\n            gen_lines[:,:-1].reshape(num_gens,-1,2), \n            gt_lines.reshape(num_gts,-1,2),linewidth=2.,metric=metric)\n    # for each det, the max iou with all gts\n    matrix_max = matrix.max(axis=1)\n    # for each det, which gt overlaps most with it\n    matrix_argmax = matrix.argmax(axis=1)\n    # sort all dets in descending order by scores\n    sort_inds = np.argsort(-gen_scores)\n\n    gt_covered = np.zeros(num_gts, dtype=bool)\n\n    # tp = 0 and fp = 0 means ignore this detected bbox,\n    for i in sort_inds:\n        if matrix_max[i] >= threshold:\n            matched_gt = matrix_argmax[i]\n            if not gt_covered[matched_gt]:\n                gt_covered[matched_gt] = True\n                tp[i] = 1\n            else:\n                fp[i] = 1\n        else:\n            fp[i] = 1\n\n    return tp, fp\n\n\ndef custom_tpfp_gen(gen_lines,\n             gt_lines,\n             threshold=0.5,\n             metric='chamfer'):\n    \"\"\"Check if detected bboxes are true positive or false positive.\n\n    Args:\n        det_bbox (ndarray): Detected bboxes of this image, of shape (m, 5).\n        gt_bboxes (ndarray): GT bboxes of this image, of shape (n, 4).\n        gt_bboxes_ignore (ndarray): Ignored gt bboxes of this image,\n            of shape (k, 4). Default: None\n        iou_thr (float): IoU threshold to be considered as matched.\n            Default: 0.5.\n        use_legacy_coordinate (bool): Whether to use coordinate system in\n            mmdet v1.x. which means width, height should be\n            calculated as 'x2 - x1 + 1` and 'y2 - y1 + 1' respectively.\n            Default: False.\n\n    Returns:\n        tuple[np.ndarray]: (tp, fp) whose elements are 0 and 1. The shape of\n        each array is (num_scales, m).\n    \"\"\"\n    if metric == 'chamfer':\n        if threshold >0:\n            threshold= -threshold\n    # else:\n    #     raise NotImplementedError\n\n    # import pdb;pdb.set_trace()\n    num_gens = gen_lines.shape[0]\n    num_gts = gt_lines.shape[0]\n    \n    # tp and fp\n    tp = np.zeros((num_gens), dtype=np.float32)\n    fp = np.zeros((num_gens), dtype=np.float32)\n\n    # if there is no gt bboxes in this image, then all det bboxes\n    # within area range are false positives\n    if num_gts == 0:\n        fp[...] = 1\n        return tp, fp\n    \n    if num_gens == 0:\n        return tp, fp\n    \n    gen_scores = gen_lines[:,-1] # n\n    # distance matrix: n x m\n\n    matrix = custom_polyline_score(\n            gen_lines[:,:-1].reshape(num_gens,-1,2), \n            gt_lines.reshape(num_gts,-1,2),linewidth=2.,metric=metric)\n    # for each det, the max iou with all gts\n    matrix_max = matrix.max(axis=1)\n    # for each det, which gt overlaps most with it\n    matrix_argmax = matrix.argmax(axis=1)\n    # sort all dets in descending order by scores\n    sort_inds = np.argsort(-gen_scores)\n\n    gt_covered = np.zeros(num_gts, dtype=bool)\n\n    # tp = 0 and fp = 0 means ignore this detected bbox,\n    for i in sort_inds:\n        if matrix_max[i] >= threshold:\n            matched_gt = matrix_argmax[i]\n            if not gt_covered[matched_gt]:\n                gt_covered[matched_gt] = True\n                tp[i] = 1\n            else:\n                fp[i] = 1\n        else:\n            fp[i] = 1\n\n    return tp, fp\n\n"
  },
  {
    "path": "mmdet3d/datasets/map_utils/tpfp_chamfer.py",
    "content": "# from ..chamfer_dist import ChamferDistance\nimport numpy as np\nfrom shapely.geometry import LineString, Polygon\nfrom shapely.strtree import STRtree\nfrom shapely.geometry import CAP_STYLE, JOIN_STYLE\nfrom scipy.spatial import distance\nimport similaritymeasures\n\n# def chamfer_distance(pred_bbox, gt_bbox):\n\n#     cd_dist_func = ChamferDistance.vec_cd_dist(\n#         pred, pred_mask, tgt, tgt_mask)()\n\n\ndef vec_iou(pred_lines, gt_lines):\n    '''\n        each line with 1 meter width\n        pred_lines: num_preds, npts, 2\n        gt_lines: num_gts, npts, 2\n    '''\n\n    num_preds = pred_lines.shape[0]\n    num_gts = gt_lines.shape[0]\n\n    pred_lines_shapely = \\\n        [LineString(i).buffer(1.,\n            cap_style=CAP_STYLE.round, join_style=JOIN_STYLE.round)\n                          for i in pred_lines]\n    gt_lines_shapely =\\\n        [LineString(i).buffer(1.,\n            cap_style=CAP_STYLE.round, join_style=JOIN_STYLE.round)\n                        for i in gt_lines]\n\n    # construct tree\n    tree = STRtree(gt_lines_shapely)\n    index_by_id = dict((id(pt), i) for i, pt in enumerate(gt_lines_shapely))\n\n    iou_matrix = np.zeros((num_preds, num_gts))\n\n    for i, pline in enumerate(pred_lines_shapely):\n\n        for o in tree.query(pline):\n            if o.intersects(pline):\n                gt_id = index_by_id[id(o)]\n\n                inter = o.intersection(pline).area\n                union = o.union(pline).area\n                iou_matrix[i, gt_id] = inter / union\n\n    return iou_matrix\n\ndef convex_iou(pred_lines, gt_lines, gt_mask):\n    '''\n        each line with 1 meter width\n        pred_lines: num_preds, List [npts, 2]\n        gt_lines: num_gts, npts, 2\n        gt_mask: num_gts, npts, 2\n    '''\n\n    num_preds = len(pred_lines)\n    num_gts = len(gt_lines)\n\n    pred_lines_shapely = \\\n        [Polygon(i).convex_hull for i in pred_lines]\n    gt_lines_shapely =\\\n        [Polygon(i[m].reshape(-1,2)).convex_hull for i,m in zip(gt_lines,gt_mask)]\n\n    # construct tree\n    tree = STRtree(pred_lines_shapely)\n    index_by_id = dict((id(pt), i) for i, pt in enumerate(pred_lines_shapely))\n\n    iou_matrix = np.zeros((num_preds, num_gts))\n\n    for i, pline in enumerate(gt_lines_shapely):\n\n        for o in tree.query(pline):\n            if o.intersects(pline):\n                pred_id = index_by_id[id(o)]\n\n                inter = o.intersection(pline).area\n                union = o.union(pline).area\n                iou_matrix[pred_id, i] = inter / union\n\n    return iou_matrix\n\ndef rbbox_iou(pred_lines, gt_lines, gt_mask):\n    '''\n        each line with 1 meter width\n        pred_lines: num_preds, List [npts, 2]\n        gt_lines: num_gts, npts, 2\n        gt_mask: num_gts, npts, 2\n    '''\n\n    num_preds = len(pred_lines)\n    num_gts = len(gt_lines)\n\n    pred_lines_shapely = \\\n        [Polygon(i).minimum_rotated_rectangle for i in pred_lines]\n    gt_lines_shapely =\\\n        [Polygon(i[m].reshape(-1,2)) for i,m in zip(gt_lines,gt_mask)]\n\n    # construct tree\n    tree = STRtree(pred_lines_shapely)\n    index_by_id = dict((id(pt), i) for i, pt in enumerate(pred_lines_shapely))\n\n    iou_matrix = np.zeros((num_preds, num_gts))\n\n    for i, pline in enumerate(gt_lines_shapely):\n\n        for o in tree.query(pline):\n            if o.intersects(pline):\n                pred_id = index_by_id[id(o)]\n\n                inter = o.intersection(pline).area\n                union = o.union(pline).area\n                iou_matrix[pred_id, i] = inter / union\n\n    return iou_matrix\n\n\ndef polyline_score(pred_lines, gt_lines, linewidth=1., metric='POR'):\n    '''\n        each line with 1 meter width\n        pred_lines: num_preds, List [npts, 2]\n        gt_lines: num_gts, npts, 2\n        gt_mask: num_gts, npts, 2\n    '''\n    positive_threshold = 1.\n    num_preds = len(pred_lines)\n    num_gts = len(gt_lines)\n    line_length = pred_lines.shape[1]\n\n    # gt_lines = gt_lines + np.array((1.,1.))\n\n    pred_lines_shapely = \\\n        [LineString(i).buffer(linewidth,\n            cap_style=CAP_STYLE.flat, join_style=JOIN_STYLE.mitre)\n                          for i in pred_lines]\n    gt_lines_shapely =\\\n        [LineString(i).buffer(linewidth,\n            cap_style=CAP_STYLE.flat, join_style=JOIN_STYLE.mitre)\n                        for i in gt_lines]\n\n    # construct tree\n    tree = STRtree(pred_lines_shapely)\n    index_by_id = dict((id(pt), i) for i, pt in enumerate(pred_lines_shapely))\n\n    if metric=='POR':\n        iou_matrix = np.zeros((num_preds, num_gts),dtype=np.float64)\n    elif metric=='frechet':\n        iou_matrix = np.full((num_preds, num_gts), -100.)\n    elif metric=='chamfer':\n        iou_matrix = np.full((num_preds, num_gts), -100.)\n    elif metric=='chamfer_v2':\n        iou_matrix = np.full((num_preds, num_gts), -100.)\n\n    for i, pline in enumerate(gt_lines_shapely):\n\n        for o in tree.query(pline):\n            if o.intersects(pline):\n                pred_id = index_by_id[id(o)]\n\n                if metric=='POR':\n                    dist_mat = distance.cdist(\n                        pred_lines[pred_id], gt_lines[i], 'euclidean')\n                    \n                    valid_ab = (dist_mat.min(-1) < positive_threshold).sum()\n                    valid_ba = (dist_mat.min(-2) < positive_threshold).sum()\n\n                    iou_matrix[pred_id, i] = min(valid_ba,valid_ab) / line_length\n                    # iou_matrix[pred_id, i] = ((valid_ba+valid_ab)/2) / line_length\n                    # assert iou_matrix[pred_id, i] <= 1. and iou_matrix[pred_id, i] >= 0.\n                elif metric=='frechet':\n                    fdistance_1 = \\\n                        -similaritymeasures.frechet_dist(pred_lines[pred_id], gt_lines[i])\n                    fdistance_2 = \\\n                        -similaritymeasures.frechet_dist(pred_lines[pred_id][::-1], gt_lines[i])\n                    fdistance = max(fdistance_1,fdistance_2)\n                    iou_matrix[pred_id, i] = fdistance\n\n                elif metric=='chamfer':\n                    dist_mat = distance.cdist(\n                        pred_lines[pred_id], gt_lines[i], 'euclidean')\n                    \n                    valid_ab = dist_mat.min(-1).sum()\n                    valid_ba = dist_mat.min(-2).sum()\n\n                    iou_matrix[pred_id, i] = -(valid_ba+valid_ab)/(2*line_length)\n                    # if iou_matrix[pred_id, i] == 0:\n                    #     import ipdb; ipdb.set_trace()\n                elif metric=='chamfer_v2':\n                    dist_mat = distance.cdist(\n                        pred_lines[pred_id], gt_lines[i], 'euclidean')\n                    \n                    valid_ab = dist_mat.min(-1).sum()\n                    valid_ba = dist_mat.min(-2).sum()\n\n                    iou_matrix[pred_id, i] = -(valid_ba/pred_lines[pred_id].shape[0]\n                                                +valid_ab/gt_lines[i].shape[0])/2\n                    # if iou_matrix[pred_id, i] == 0:\n                    #     import ipdb; ipdb.set_trace()\n\n    \n    # if True:\n    #     import matplotlib.pyplot as plt\n    #     print('pred num', num_preds)\n    #     print('gt num', num_gts)\n    #     for i in range(num_preds):\n    #         plt.plot(pred_lines[i][:,0],pred_lines[i][:,1],'-',color='red',alpha=0.5)\n    #     for i in range(num_gts):\n    #         plt.plot(gt_lines[i][:,0],gt_lines[i][:,1],'-',color='blue',alpha=0.5)\n    #     plt.savefig('test.png')\n    #     plt.close()\n    return iou_matrix\n\n\ndef custom_polyline_score(pred_lines, gt_lines, linewidth=1., metric='chamfer'):\n    '''\n        each line with 1 meter width\n        pred_lines: num_preds, List [npts, 2]\n        gt_lines: num_gts, npts, 2\n        gt_mask: num_gts, npts, 2\n    '''\n    if metric == 'iou':\n        linewidth = 1.0\n    positive_threshold = 1.\n    num_preds = len(pred_lines)\n    num_gts = len(gt_lines)\n    line_length = pred_lines.shape[1]\n\n    # gt_lines = gt_lines + np.array((1.,1.))\n\n    pred_lines_shapely = \\\n        [LineString(i).buffer(linewidth,\n            cap_style=CAP_STYLE.flat, join_style=JOIN_STYLE.mitre)\n                          for i in pred_lines]\n    gt_lines_shapely =\\\n        [LineString(i).buffer(linewidth,\n            cap_style=CAP_STYLE.flat, join_style=JOIN_STYLE.mitre)\n                        for i in gt_lines]\n\n    # construct tree\n    tree = STRtree(pred_lines_shapely)\n    index_by_id = dict((id(pt), i) for i, pt in enumerate(pred_lines_shapely))\n\n\n    if metric=='chamfer':\n        iou_matrix = np.full((num_preds, num_gts), -100.)\n    elif metric=='iou':\n        iou_matrix = np.zeros((num_preds, num_gts),dtype=np.float64)\n    else:\n        raise NotImplementedError\n\n    for i, pline in enumerate(gt_lines_shapely):\n\n        for o in tree.query(pline):\n            if o.intersects(pline):\n                pred_id = index_by_id[id(o)]\n\n                if metric=='chamfer':\n                    dist_mat = distance.cdist(\n                        pred_lines[pred_id], gt_lines[i], 'euclidean')\n                    # import pdb;pdb.set_trace()\n                    valid_ab = dist_mat.min(-1).mean()\n                    valid_ba = dist_mat.min(-2).mean()\n\n                    iou_matrix[pred_id, i] = -(valid_ba+valid_ab)/2\n                elif metric=='iou':\n                    inter = o.intersection(pline).area\n                    union = o.union(pline).area\n                    iou_matrix[pred_id, i] = inter / union\n\n    return iou_matrix\n\nif __name__ == '__main__':\n    import torch\n\n    line1 = torch.tensor([\n        [1, 5], [3, 5], [5, 5]\n    ])\n\n    line0 = torch.tensor([\n        [3, 6], [4, 8], [5, 6]\n    ])\n\n    line2 = torch.tensor([\n        [1, 4], [3, 4], [5, 4]\n    ])\n\n    line3 = torch.tensor([\n        [4, 4], [3, 3], [5, 3]\n    ])\n\n    gt = torch.stack((line2, line3), dim=0).type(torch.float32)\n    pred = torch.stack((line0, line1), dim=0).type(torch.float32)\n\n    # import ipdb; ipdb.set_trace()\n    import mmcv\n    # with mmcv.Timer():\n    #     gt = upsampler(gt, pts=10)\n    #     pred = upsampler(pred, pts=10)\n\n    import matplotlib.pyplot as plt\n    from shapely.geometry import LineString\n    from descartes import PolygonPatch\n    \n    iou_matrix = vec_iou(pred,gt)\n    print(iou_matrix)\n    # import pdb;pdb.set_trace()\n    score_matrix = custom_polyline_score(pred, gt, linewidth=1., metric='chamfer')\n    print(score_matrix)\n    fig, ax = plt.subplots()\n    for i in gt:\n        i = i.numpy()\n        plt.plot(i[:, 0], i[:, 1], 'o', color='red')\n        plt.plot(i[:, 0], i[:, 1], '-', color='red')\n\n        dilated = LineString(i).buffer(1, cap_style=CAP_STYLE.round, join_style=JOIN_STYLE.round)\n        patch1 = PolygonPatch(dilated, fc='red', ec='red', alpha=0.5, zorder=-1)\n        ax.add_patch(patch1)\n\n    for i in pred:\n        i = i.numpy()\n        plt.plot(i[:, 0], i[:, 1], 'o', color='blue')\n        plt.plot(i[:, 0], i[:, 1], '-', color='blue')\n\n        dilated = LineString(i).buffer(1, cap_style=CAP_STYLE.flat, join_style=JOIN_STYLE.mitre)\n        patch1 = PolygonPatch(dilated, fc='blue', ec='blue', alpha=0.5, zorder=-1)\n        ax.add_patch(patch1)\n\n\n    ax.axis('equal')\n\n\n    plt.savefig('test3.png')    "
  },
  {
    "path": "mmdet3d/datasets/nuscenes_dataset.py",
    "content": "# Copyright (c) 2022-2023, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# https://github.com/NVlabs/FB-BEV/blob/main/LICENSE\n\n\n# Copyright (c) OpenMMLab. All rights reserved.\nimport tempfile\nimport copy\nfrom os import path as osp\nimport os\nimport mmcv\nimport sys\nimport numpy as np\nimport pyquaternion\nfrom nuscenes.utils.data_classes import Box as NuScenesBox\nfrom .utils import nuscenes_get_rt_matrix\nfrom ..core import show_result\nfrom ..core.bbox import Box3DMode, Coord3DMode, LiDARInstance3DBoxes\nfrom .builder import DATASETS\nfrom .custom_3d import Custom3DDataset\nfrom .pipelines import Compose\nfrom tqdm import tqdm\nimport csv\nimport math\nimport torch\nfrom nuscenes.eval.common.utils import quaternion_yaw, Quaternion\n# from .vad_custom_nuscenes_eval import NuScenesEval_custom\nfrom nuscenes.eval.common.utils import center_distance\n# from projects.mmdet3d_plugin.models.utils.visual import save_tensor\nfrom mmcv.parallel import DataContainer as DC\nimport random\nfrom mmdet3d.core import LiDARInstance3DBoxes\nfrom nuscenes.utils.data_classes import Box as NuScenesBox\n# from projects.mmdet3d_plugin.core.bbox.structures.nuscenes_box import CustomNuscenesBox\nfrom shapely import affinity, ops\nfrom shapely.geometry import LineString, box, MultiPolygon, MultiLineString\nfrom mmdet.datasets.pipelines import to_tensor\nfrom nuscenes.map_expansion.map_api import NuScenesMap, NuScenesMapExplorer\nfrom nuscenes.eval.detection.constants import DETECTION_NAMES\nfrom .vector_map import VectorizedLocalMap\n\n@DATASETS.register_module()\nclass NuScenesDataset(Custom3DDataset):\n    r\"\"\"NuScenes Dataset.\n\n    This class serves as the API for experiments on the NuScenes Dataset.\n\n    Please refer to `NuScenes Dataset <https://www.nuscenes.org/download>`_\n    for data downloading.\n\n    Args:\n        ann_file (str): Path of annotation file.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        data_root (str): Path of dataset root.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        load_interval (int, optional): Interval of loading the dataset. It is\n            used to uniformly sample the dataset. Defaults to 1.\n        with_velocity (bool, optional): Whether include velocity prediction\n            into the experiments. Defaults to True.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        box_type_3d (str, optional): Type of 3D box of this dataset.\n            Based on the `box_type_3d`, the dataset will encapsulate the box\n            to its original format then converted them to `box_type_3d`.\n            Defaults to 'LiDAR' in this dataset. Available options includes.\n            - 'LiDAR': Box in LiDAR coordinates.\n            - 'Depth': Box in depth coordinates, usually for indoor dataset.\n            - 'Camera': Box in camera coordinates.\n        filter_empty_gt (bool, optional): Whether to filter empty GT.\n            Defaults to True.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n        eval_version (bool, optional): Configuration version of evaluation.\n            Defaults to  'detection_cvpr_2019'.\n        use_valid_flag (bool, optional): Whether to use `use_valid_flag` key\n            in the info file as mask to filter gt_boxes and gt_names.\n            Defaults to False.\n        img_info_prototype (str, optional): Type of img information.\n            Based on 'img_info_prototype', the dataset will prepare the image\n            data info in the type of 'mmcv' for official image infos,\n            'bevdet' for BEVDet, and 'bevdet4d' for BEVDet4D.\n            Defaults to 'mmcv'.\n        multi_adj_frame_id_cfg (tuple[int]): Define the selected index of\n            reference adjcacent frames.\n        ego_cam (str): Specify the ego coordinate relative to a specified\n            camera by its name defined in NuScenes.\n            Defaults to None, which use the mean of all cameras.\n    \"\"\"\n    NameMapping = {\n        'movable_object.barrier': 'barrier',\n        'vehicle.bicycle': 'bicycle',\n        'vehicle.bus.bendy': 'bus',\n        'vehicle.bus.rigid': 'bus',\n        'vehicle.car': 'car',\n        'vehicle.construction': 'construction_vehicle',\n        'vehicle.motorcycle': 'motorcycle',\n        'human.pedestrian.adult': 'pedestrian',\n        'human.pedestrian.child': 'pedestrian',\n        'human.pedestrian.construction_worker': 'pedestrian',\n        'human.pedestrian.police_officer': 'pedestrian',\n        'movable_object.trafficcone': 'traffic_cone',\n        'vehicle.trailer': 'trailer',\n        'vehicle.truck': 'truck'\n    }\n    DefaultAttribute = {\n        'car': 'vehicle.parked',\n        'pedestrian': 'pedestrian.moving',\n        'trailer': 'vehicle.parked',\n        'truck': 'vehicle.parked',\n        'bus': 'vehicle.moving',\n        'motorcycle': 'cycle.without_rider',\n        'construction_vehicle': 'vehicle.parked',\n        'bicycle': 'cycle.without_rider',\n        'barrier': '',\n        'traffic_cone': '',\n    }\n    AttrMapping = {\n        'cycle.with_rider': 0,\n        'cycle.without_rider': 1,\n        'pedestrian.moving': 2,\n        'pedestrian.standing': 3,\n        'pedestrian.sitting_lying_down': 4,\n        'vehicle.moving': 5,\n        'vehicle.parked': 6,\n        'vehicle.stopped': 7,\n    }\n    AttrMapping_rev = [\n        'cycle.with_rider',\n        'cycle.without_rider',\n        'pedestrian.moving',\n        'pedestrian.standing',\n        'pedestrian.sitting_lying_down',\n        'vehicle.moving',\n        'vehicle.parked',\n        'vehicle.stopped',\n    ]\n    # https://github.com/nutonomy/nuscenes-devkit/blob/57889ff20678577025326cfc24e57424a829be0a/python-sdk/nuscenes/eval/detection/evaluate.py#L222 # noqa\n    ErrNameMapping = {\n        'trans_err': 'mATE',\n        'scale_err': 'mASE',\n        'orient_err': 'mAOE',\n        'vel_err': 'mAVE',\n        'attr_err': 'mAAE'\n    }\n    CLASSES = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',\n               'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',\n               'barrier')\n    \n    TRACKING_CLASSES = ['car', 'truck', 'bus', 'trailer',\n               'motorcycle', 'bicycle', 'pedestrian']\n\n    def __init__(self,\n                 ann_file=None,\n                 pipeline=None,\n                 data_root=None,\n                 classes=None,\n                 load_interval=1,\n                 with_velocity=True,\n                 modality=None,\n                 box_type_3d='LiDAR',\n                 filter_empty_gt=True,\n                 test_mode=False,\n                 eval_version='detection_cvpr_2019',\n                 use_valid_flag=False,\n                 img_info_prototype='mmcv',\n                 multi_adj_frame_id_cfg=None,\n                 occupancy_path='/mount/dnn_data/occupancy_2023/gts',\n                 ego_cam='CAM_FRONT',\n                 # SOLLOFusion\n                 use_sequence_group_flag=False,\n                 sequences_split_num=1,\n                 # MAP\n                 map_classes = ['divider', 'ped_crossing', 'boundary'],\n                 map_ann_file= '',\n                 point_cloud_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0],\n                 map_eval_cfg=dict(),\n                 load_fut_bbox_info=False,\n                ):\n        self.load_interval = load_interval\n        self.use_valid_flag = use_valid_flag\n\n        super().__init__(\n            data_root=data_root,\n            ann_file=ann_file,\n            pipeline=pipeline,\n            classes=classes,\n            modality=modality,\n            box_type_3d=box_type_3d,\n            filter_empty_gt=filter_empty_gt,\n            test_mode=test_mode)\n        self.load_fut_bbox_info = load_fut_bbox_info\n        self.occupancy_path = occupancy_path\n        self.with_velocity = with_velocity\n        self.eval_version = eval_version\n        from nuscenes.eval.detection.config import config_factory\n\n        self.eval_detection_configs = config_factory(self.eval_version)\n        if self.modality is None:\n            self.modality = dict(\n                use_camera=False,\n                use_lidar=True,\n                use_radar=False,\n                use_map=False,\n                use_external=False,\n            )\n        self.map_eval_cfg = map_eval_cfg\n        self.map_ann_file = map_ann_file\n        self.MAPCLASSES = self.get_map_classes(map_classes)\n        self.NUM_MAPCLASSES = len(self.MAPCLASSES)\n        self.pc_range = point_cloud_range\n\n        self.img_info_prototype = img_info_prototype\n        self.multi_adj_frame_id_cfg = multi_adj_frame_id_cfg\n        self.ego_cam = ego_cam\n        self.nusc = None\n\n        # SOLOFusion\n        self.use_sequence_group_flag = use_sequence_group_flag\n        self.sequences_split_num = sequences_split_num\n        # sequences_split_num splits eacgh sequence into sequences_split_num parts.\n        # if self.test_mode:\n        #     assert self.sequences_split_num == 1\n        if self.use_sequence_group_flag:\n            self._set_sequence_group_flag() # Must be called after load_annotations b/c load_annotations does sorting.\n\n\n\n    def get_cat_ids(self, idx):\n        \"\"\"Get category distribution of single scene.\n\n        Args:\n            idx (int): Index of the data_info.\n\n        Returns:\n            dict[list]: for each category, if the current scene\n                contains such boxes, store a list containing idx,\n                otherwise, store empty list.\n        \"\"\"\n        info = self.data_infos[idx]\n        if self.use_valid_flag:\n            mask = info['valid_flag']\n            gt_names = set(info['gt_names'][mask])\n        else:\n            gt_names = set(info['gt_names'])\n\n        cat_ids = []\n        for name in gt_names:\n            if name in self.CLASSES:\n                cat_ids.append(self.cat2id[name])\n        return cat_ids\n\n    def load_annotations(self, ann_file):\n        \"\"\"Load annotations from ann_file.\n\n        Args:\n            ann_file (str): Path of the annotation file.\n\n        Returns:\n            list[dict]: List of annotations sorted by timestamps.\n        \"\"\"\n        data = mmcv.load(ann_file, file_format='pkl')\n\n        data_infos = data['infos'][::self.load_interval]\n        self.metadata = data['metadata']\n\n        self.version = self.metadata['version']\n        if len(data_infos) < 100:\n            self.version = 'v1.0-mini'\n        return data_infos\n\n\n    def _set_sequence_group_flag(self):\n        \"\"\"\n        Set each sequence to be a different group\n        \"\"\"\n           \n        res = []\n        curr_sequence = 0\n        for idx in range(len(self.data_infos)):\n            if idx != 0 and len(self.data_infos[idx]['prev']) == 0:\n                # Not first frame and # of sweeps is 0 -> new sequence\n                curr_sequence += 1\n            res.append(curr_sequence)\n        self.flag = np.array(res, dtype=np.int64)\n        if self.sequences_split_num != 1:\n            if self.sequences_split_num == 'all':\n                self.flag = np.array(range(len(self.data_infos)), dtype=np.int64)\n            else:\n                bin_counts = np.bincount(self.flag)\n                new_flags = []\n                curr_new_flag = 0\n                for curr_flag in range(len(bin_counts)):\n                    curr_sequence_length = np.array(\n                        list(range(0, \n                                bin_counts[curr_flag], \n                                math.ceil(bin_counts[curr_flag] / self.sequences_split_num)))\n                        + [bin_counts[curr_flag]])\n                    for sub_seq_idx in (curr_sequence_length[1:] - curr_sequence_length[:-1]):\n                        for _ in range(sub_seq_idx):\n                            new_flags.append(curr_new_flag)\n                        curr_new_flag += 1\n\n                assert len(new_flags) == len(self.flag)\n                assert len(np.bincount(new_flags)) == len(np.bincount(self.flag)) * self.sequences_split_num\n                self.flag = np.array(new_flags, dtype=np.int64)\n\n    def get_data_info(self, index):\n        \"\"\"Get data info according to the given index.\n\n        Args:\n            index (int): Index of the sample data to get.\n\n        Returns:\n            dict: Data information that will be passed to the data\n                preprocessing pipelines. It includes the following keys:\n\n                - sample_idx (str): Sample index.\n                - pts_filename (str): Filename of point clouds.\n                - sweeps (list[dict]): Infos of sweeps.\n                - timestamp (float): Sample timestamp.\n                - img_filename (str, optional): Image filename.\n                - lidar2img (list[np.ndarray], optional): Transformations\n                    from lidar to different cameras.\n                - ann_info (dict): Annotation info.\n        \"\"\"\n        info = copy.deepcopy(self.data_infos[index])\n        # standard protocol modified from SECOND.Pytorch\n        input_dict = dict(\n            index=index,\n            sample_idx=info['token'],\n            pts_filename=info['lidar_path'],\n            sweeps=info['sweeps'],\n            scene_name=info['scene_name'],\n            timestamp=info['timestamp'] / 1e6,\n            lidarseg_filename=info.get('lidarseg_filename', 'None') \n        )\n\n        if 'instance_inds' in info.keys():\n            assert len(info['instance_inds']) == len(info['valid_flag'])\n            if len(info['instance_inds'])>0:\n                input_dict['instance_inds'] = np.array(info['instance_inds'])[info['valid_flag']]\n            else: input_dict['instance_inds'] = np.array(info['instance_inds'])\n\n        if 'ann_infos' in info:\n            input_dict['ann_infos'] = info['ann_infos']\n            \n        if self.modality['use_camera']:\n            if self.img_info_prototype == 'mmcv':\n                image_paths = []\n                lidar2img_rts = []\n\n                for cam_type, cam_info in info['cams'].items():\n                    image_paths.append(cam_info['data_path'])\n                    # obtain lidar to image transformation matrix\n                    lidar2cam_r = np.linalg.inv(\n                        cam_info['sensor2lidar_rotation'])\n                    lidar2cam_t = cam_info[\n                        'sensor2lidar_translation'] @ lidar2cam_r.T\n                    lidar2cam_rt = np.eye(4)\n                    lidar2cam_rt[:3, :3] = lidar2cam_r.T\n                    lidar2cam_rt[3, :3] = -lidar2cam_t\n                    intrinsic = cam_info['cam_intrinsic']\n                    viewpad = np.eye(4)\n                    viewpad[:intrinsic.shape[0], :intrinsic.\n                            shape[1]] = intrinsic\n                    lidar2img_rt = (viewpad @ lidar2cam_rt.T)\n                    lidar2img_rts.append(lidar2img_rt)\n                    cam_position = np.linalg.inv(lidar2cam_rt.T) @ np.array([0., 0., 0., 1.]).reshape([4, 1])\n                    cam_positions.append(cam_position.flatten()[:3])\n                   \n\n                input_dict.update(\n                    dict(\n                        \n                        img_filename=image_paths,\n                        lidar2img=lidar2img_rts,\n                    ))\n\n                if not self.test_mode:\n                    annos = self.get_ann_info(index)\n                    input_dict['ann_info'] = annos\n            else:   \n                assert 'bevdet' in self.img_info_prototype\n                input_dict.update(dict(curr=info))\n                if '4d' in self.img_info_prototype:\n                    info_adj_list = self.get_adj_info(info, index)\n                    input_dict.update(dict(adjacent=info_adj_list))\n            if self.use_sequence_group_flag:\n                input_dict['sample_index'] = index\n                input_dict['sequence_group_idx'] = self.flag[index]\n                input_dict['start_of_sequence'] = index == 0 or self.flag[index - 1] != self.flag[index]\n                # Get a transformation matrix from current keyframe lidar to previous keyframe lidar\n                # if they belong to same sequence.\n                can_bus_info = info['gt_ego_lcf_feat']\n                input_dict['can_bus_info'] = can_bus_info\n                input_dict['nuscenes_get_rt_matrix'] = dict(\n                    lidar2ego_rotation = info['lidar2ego_rotation'],\n                    lidar2ego_translation = info['lidar2ego_translation'],\n                    ego2global_rotation = info['ego2global_rotation'],\n                    ego2global_translation = info['ego2global_translation'],\n                )\n\n                input_dict['ego_pose_inv'] = torch.FloatTensor(nuscenes_get_rt_matrix(\n                    info, info,\n                    \"global\", \"ego\"))\n                \n                input_dict['ego_pose'] = torch.FloatTensor(nuscenes_get_rt_matrix(\n                    info, info,\n                    \"ego\", \"global\"))\n                \n                \n\n                if not input_dict['start_of_sequence']:\n                    input_dict['curr_to_prev_lidar_rt'] = torch.FloatTensor(nuscenes_get_rt_matrix(\n                        info, self.data_infos[index - 1],\n                        \"lidar\", \"lidar\"))\n                    input_dict['prev_lidar_to_global_rt'] = torch.FloatTensor(nuscenes_get_rt_matrix(\n                        self.data_infos[index - 1], info,\n                        \"lidar\", \"global\")) # TODO: Note that global is same for all.\n                    input_dict['curr_to_prev_ego_rt'] = torch.FloatTensor(nuscenes_get_rt_matrix(\n                       info, self.data_infos[index - 1],\n                        \"ego\", \"ego\"))\n                else:\n                    input_dict['curr_to_prev_lidar_rt'] = torch.eye(4).float()\n                    input_dict['prev_lidar_to_global_rt'] = torch.FloatTensor(nuscenes_get_rt_matrix( \n                        info, info, \"lidar\", \"global\")\n                        )\n                    input_dict['curr_to_prev_ego_rt'] = torch.FloatTensor(nuscenes_get_rt_matrix(\n                        info, info,\n                        \"ego\", \"ego\"))\n                input_dict['global_to_curr_lidar_rt'] = torch.FloatTensor(nuscenes_get_rt_matrix(\n                    info, info,\n                    \"global\", \"lidar\"))\n                \n\n                if self.load_fut_bbox_info:\n                    fut_boxes_info, fut_labels_info = self.get_fut_bbox_info(info, index)\n                    input_dict['fut_boxes_info'] = fut_boxes_info\n                    input_dict['fut_labels_info'] = fut_labels_info\n                 \n        return input_dict\n\n    def get_fut_bbox_info(self, info, index):\n        fut_boxes_info = []\n        fut_labels_info = []\n        for select_id in range(1, 7):\n            select_id = min(index + select_id, len(self.data_infos)-1)\n            if not self.data_infos[select_id]['scene_token'] == info[\n                    'scene_token']:\n                fut_boxes_info.append([])\n                fut_labels_info.append([])\n            else:\n                fut_boxes_info.append(self.data_infos[select_id]['ann_infos']['gt_boxes_3d_in_global'])\n                fut_labels_info.append(self.data_infos[select_id]['ann_infos']['gt_labels_3d'])\n\n        return fut_boxes_info, fut_labels_info\n\n\n    def get_adj_info(self, info, index):\n        info_adj_list = []\n        for select_id in range(*self.multi_adj_frame_id_cfg):\n            if select_id == 0: continue\n            select_id = min(max(index - select_id, 0), len(self.data_infos)-1)\n\n            if not self.data_infos[select_id]['scene_token'] == info[\n                    'scene_token']:\n                info_adj_list.append(info)\n            else:\n                info_adj_list.append(self.data_infos[select_id])\n        return info_adj_list\n\n    def get_ann_info(self, index):\n        \"\"\"Get annotation info according to the given index.\n\n        Args:\n            index (int): Index of the annotation data to get.\n\n        Returns:\n            dict: Annotation information consists of the following keys:\n\n                - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`):\n                    3D ground truth bboxes\n                - gt_labels_3d (np.ndarray): Labels of ground truths.\n                - gt_names (list[str]): Class names of ground truths.\n        \"\"\"\n        info = self.data_infos[index]\n        # filter out bbox containing no points\n        if self.use_valid_flag:\n            mask = info['valid_flag']\n        else:\n            mask = info['num_lidar_pts'] > 0\n        gt_bboxes_3d = info['gt_boxes'][mask]\n        gt_names_3d = info['gt_names'][mask]\n        gt_labels_3d = []\n        for cat in gt_names_3d:\n            if cat in self.CLASSES:\n                gt_labels_3d.append(self.CLASSES.index(cat))\n            else:\n                gt_labels_3d.append(-1)\n        gt_labels_3d = np.array(gt_labels_3d)\n\n        if self.with_velocity:\n            gt_velocity = info['gt_velocity'][mask]\n            nan_mask = np.isnan(gt_velocity[:, 0])\n            gt_velocity[nan_mask] = [0.0, 0.0]\n            gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocity], axis=-1)\n\n        # the nuscenes box center is [0.5, 0.5, 0.5], we change it to be\n        # the same as KITTI (0.5, 0.5, 0)\n        gt_bboxes_3d = LiDARInstance3DBoxes(\n            gt_bboxes_3d,\n            box_dim=gt_bboxes_3d.shape[-1],\n            origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)\n\n        anns_results = dict(\n            gt_bboxes_3d=gt_bboxes_3d,\n            gt_labels_3d=gt_labels_3d,\n            gt_names=gt_names_3d)\n        return anns_results\n\n\n    def format_map_results(self, results, jsonfile_prefix=None):\n        \"\"\"Format the results to json (standard format for COCO evaluation).\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            jsonfile_prefix (str | None): The prefix of json files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n\n        Returns:\n            tuple: Returns (result_files, tmp_dir), where `result_files` is a \\\n                dict containing the json filepaths, `tmp_dir` is the temporal \\\n                directory created for saving json files when \\\n                `jsonfile_prefix` is not specified.\n        \"\"\"\n        if isinstance(results, dict):\n            results = results['map_results']\n        assert isinstance(results, list)\n        assert len(results) >= len(self), (\n            'The length of results is not equal to the dataset len: {} != {}'.\n            format(len(results), len(self)))\n\n        if jsonfile_prefix is None:\n            tmp_dir = tempfile.TemporaryDirectory()\n            jsonfile_prefix = osp.join(tmp_dir.name, 'results')\n        else:\n            tmp_dir = None\n\n        # currently the output prediction results could be in two formats\n        # 1. list of dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...)\n        # 2. list of dict('pts_bbox' or 'img_bbox':\n        #     dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...))\n        # this is a workaround to enable evaluation of both formats on nuScenes\n        # refer to https://github.com/open-mmlab/mmdetection3d/issues/449\n        if not ('pred_map' in results[0]):\n            result_files = self._format_map(results, jsonfile_prefix)\n        else:\n            # should take the inner dict out of 'pts_bbox' or 'img_bbox' dict\n            result_files = dict()\n            for name in ['pred_map']:\n                print(f'\\nFormating {name}')\n                results_ = [out[name] for out in results]\n                tmp_file_ = osp.join(jsonfile_prefix, name)\n                result_files.update(\n                    {name: self._format_map(results_, tmp_file_)})\n        return result_files, tmp_dir\n\n    @classmethod\n    def get_map_classes(cls, map_classes=None):\n        \"\"\"Get class names of current dataset.\n\n        Args:\n            classes (Sequence[str] | str | None): If classes is None, use\n                default CLASSES defined by builtin dataset. If classes is a\n                string, take it as a file name. The file contains the name of\n                classes where each line contains one class name. If classes is\n                a tuple or list, override the CLASSES defined by the dataset.\n\n        Return:\n            list[str]: A list of class names.\n        \"\"\"\n        if map_classes is None:\n            return cls.MAPCLASSES\n\n        if isinstance(map_classes, str):\n            # take it as a file path\n            class_names = mmcv.list_from_file(map_classes)\n        elif isinstance(map_classes, (tuple, list)):\n            class_names = map_classes\n        else:\n            raise ValueError(f'Unsupported type {type(map_classes)} of map classes.')\n\n        return class_names\n\n    def _format_map(self, results, jsonfile_prefix=None, score_thresh=0.2):\n        \"\"\"Convert the results to the standard format.\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            jsonfile_prefix (str): The prefix of the output jsonfile.\n                You can specify the output directory/filename by\n                modifying the jsonfile_prefix. Default: None.\n\n        Returns:\n            str: Path of the output json file.\n        \"\"\"\n\n        # assert self.map_ann_file is not None\n        map_pred_annos = {}\n        map_mapped_class_names = self.MAPCLASSES\n        processed_set = set()\n        for sample_id, det in enumerate(mmcv.track_iter_progress(results)):\n            sample_id = det.get('index', sample_id)\n            if sample_id in processed_set: continue\n            processed_set.add(sample_id)\n            map_pred_anno = {}\n            vecs = output_to_vecs(det)\n            sample_token = self.data_infos[sample_id]['token']\n            map_pred_anno['sample_token'] = sample_token\n            pred_vec_list=[]\n            for i, vec in enumerate(vecs):\n                name = map_mapped_class_names[vec['label']]\n                anno = dict(\n                    sample_token=sample_token,\n                    pts=vec['pts'],\n                    pts_num=len(vec['pts']),\n                    cls_name=name,\n                    type=vec['label'],\n                    confidence_level=vec['score'])\n                pred_vec_list.append(anno)\n                # annos.append(nusc_anno)\n            # nusc_annos[sample_token] = annos\n            map_pred_anno['vectors'] = pred_vec_list\n            map_pred_annos[sample_token] = map_pred_anno\n        # self._format_map_gt()\n\n        if not os.path.exists(self.map_ann_file):\n            self._format_map_gt()\n        else:\n            print(f'{self.map_ann_file} exist, not update')\n        # with open(self.map_ann_file,'r') as f:\n        #     GT_anns = json.load(f)\n        # gt_annos = GT_anns['GTs']\n\n        nusc_submissions = {\n            'meta': self.modality,\n            'map_results': map_pred_annos,\n        }\n\n        mmcv.mkdir_or_exist(jsonfile_prefix)\n        res_path = osp.join(jsonfile_prefix, 'map_results_nusc.json')\n        print('Map Results writes to', res_path)\n        mmcv.dump(nusc_submissions, res_path)\n        return res_path\n\n    def vectormap_pipeline(self, location, ego2global_translation, patch_angle, flip_dx=False, flip_dy=False):\n        '''\n        `example` type: <class 'dict'>\n            keys: 'img_metas', 'gt_bboxes_3d', 'gt_labels_3d', 'img';\n                  all keys type is 'DataContainer';\n                  'img_metas' cpu_only=True, type is dict, others are false;\n                  'gt_labels_3d' shape torch.size([num_samples]), stack=False,\n                                padding_value=0, cpu_only=False\n                  'gt_bboxes_3d': stack=False, cpu_only=True\n        '''\n\n        anns_results = self.vector_map.gen_vectorized_samples(\n            location, ego2global_translation, patch_angle, flip_dx, flip_dy\n        )\n        \n        '''\n        anns_results, type: dict\n            'gt_vecs_pts_loc': list[num_vecs], vec with num_points*2 coordinates\n            'gt_vecs_pts_num': list[num_vecs], vec with num_points\n            'gt_vecs_label': list[num_vecs], vec with cls index\n        '''\n        gt_vecs_label = to_tensor(anns_results['gt_vecs_label'])\n        from .vector_map import LiDARInstanceLines\n        if isinstance(anns_results['gt_vecs_pts_loc'], LiDARInstanceLines):\n            gt_vecs_pts_loc = anns_results['gt_vecs_pts_loc']\n        else:\n            gt_vecs_pts_loc = to_tensor(anns_results['gt_vecs_pts_loc'])\n            try:\n                gt_vecs_pts_loc = gt_vecs_pts_loc.flatten(1).to(dtype=torch.float32)\n            except:\n                # empty tensor, will be passed in train, \n                # but we preserve it for test\n                gt_vecs_pts_loc = gt_vecs_pts_loc\n\n        return dict(\n            map_gt_labels_3d = DC(gt_vecs_label, cpu_only=False),\n            map_gt_bboxes_3d = DC(gt_vecs_pts_loc, cpu_only=True),\n        )\n\n    def _format_map_gt(self):\n        gt_annos = []\n        print('Start to convert gt map format...')\n        # assert self.map_ann_file is not None\n\n        if  (not os.path.exists(self.map_ann_file)) :\n\n            patch_h, patch_w = self.map_eval_cfg['region']\n            patch_h = min(patch_h, 50)\n            self.vector_map = VectorizedLocalMap(self.data_root, \n                            patch_size=(patch_h, patch_w), map_classes=self.MAPCLASSES, \n                            fixed_ptsnum_per_line=20,\n                            padding_value=-10000)\n\n            dataset_length = len(self)\n            prog_bar = mmcv.ProgressBar(dataset_length)\n            mapped_class_names = self.MAPCLASSES\n            for sample_id in range(dataset_length):\n                sample_token = self.data_infos[sample_id]['token']\n                gt_anno = {}\n                gt_anno['sample_token'] = sample_token\n                # gt_sample_annos = []\n                gt_sample_dict = {}\n                \n                ego_pose = torch.FloatTensor(nuscenes_get_rt_matrix(\n                    self.data_infos[sample_id], self.data_infos[sample_id],\n                    \"ego\", \"global\"))\n\n                ego2global_translation = list(ego_pose[:3,3].numpy())\n                v = np.dot(ego_pose[:3,:3].numpy(), np.array([1, 0, 0]))\n                yaw = np.arctan2(v[1], v[0])\n                patch_angle = yaw / np.pi * 180\n                location = self.data_infos[sample_id]['map_location']\n\n                gt_sample_dict =  self.vectormap_pipeline(location, ego2global_translation, patch_angle)\n                gt_labels = gt_sample_dict['map_gt_labels_3d'].data.numpy()\n                gt_vecs = gt_sample_dict['map_gt_bboxes_3d'].data.instance_list\n                gt_vec_list = []\n                for i, (gt_label, gt_vec) in enumerate(zip(gt_labels, gt_vecs)):\n                    name = mapped_class_names[gt_label]\n                    anno = dict(\n                        pts=np.array(list(gt_vec.coords)),\n                        pts_num=len(list(gt_vec.coords)),\n                        cls_name=name,\n                        type=gt_label,\n                    )\n                    gt_vec_list.append(anno)\n                gt_anno['vectors']=gt_vec_list\n                gt_annos.append(gt_anno)\n\n                prog_bar.update()\n            nusc_submissions = {\n                'GTs': gt_annos\n            }\n            print('\\n GT anns writes to', self.map_ann_file)\n            mmcv.dump(nusc_submissions, self.map_ann_file)\n        else:\n            print(f'{self.map_ann_file} exist, not update')\n\n\n    def _evaluate_single(self,\n                         result_path,\n                         logger=None,\n                         metric='bbox',\n                         result_name='pts_bbox'):\n        \"\"\"Evaluation for a single model in nuScenes protocol.\n\n        Args:\n            result_path (str): Path of the result file.\n            logger (logging.Logger | str, optional): Logger used for printing\n                related information during evaluation. Default: None.\n            metric (str, optional): Metric name used for evaluation.\n                Default: 'bbox'.\n            result_name (str, optional): Result name in the metric prefix.\n                Default: 'pts_bbox'.\n\n        Returns:\n            dict: Dictionary of evaluation details.\n        \"\"\"\n        from nuscenes import NuScenes\n        from nuscenes.eval.detection.evaluate import NuScenesEval\n\n        output_dir = osp.join(*osp.split(result_path)[:-1])\n\n        self.nusc = NuScenes(\n            version=self.version, dataroot=self.data_root, verbose=False)\n        eval_set_map = {\n            'v1.0-mini': 'mini_val',\n            'v1.0-trainval': 'val',\n        }\n        nusc_eval = NuScenesEval(\n            self.nusc,\n            config=self.eval_detection_configs,\n            result_path=result_path,\n            eval_set=eval_set_map[self.version],\n            output_dir=output_dir,\n            verbose=False)\n        nusc_eval.main(render_curves=False)\n\n        # record metrics\n        metrics = mmcv.load(osp.join(output_dir, 'metrics_summary.json'))\n        detail = dict()\n        metric_prefix = f'{result_name}_NuScenes'\n        for name in self.CLASSES:\n            for k, v in metrics['label_aps'][name].items():\n                val = float('{:.4f}'.format(v))\n                detail['{}/{}_AP_dist_{}'.format(metric_prefix, name, k)] = val\n            for k, v in metrics['label_tp_errors'][name].items():\n                val = float('{:.4f}'.format(v))\n                detail['{}/{}_{}'.format(metric_prefix, name, k)] = val\n            for k, v in metrics['tp_errors'].items():\n                val = float('{:.4f}'.format(v))\n                detail['{}/{}'.format(metric_prefix,\n                                      self.ErrNameMapping[k])] = val\n\n        detail['{}/NDS'.format(metric_prefix)] = metrics['nd_score']\n        detail['{}/mAP'.format(metric_prefix)] = metrics['mean_ap']\n        return detail\n\n    def format_results(self, results, jsonfile_prefix=None):\n        \"\"\"Format the results to json (standard format for COCO evaluation).\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            jsonfile_prefix (str): The prefix of json files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n\n        Returns:\n            tuple: Returns (result_files, tmp_dir), where `result_files` is a\n                dict containing the json filepaths, `tmp_dir` is the temporal\n                directory created for saving json files when\n                `jsonfile_prefix` is not specified.\n        \"\"\"\n        assert isinstance(results, list), 'results must be a list'\n        assert len(results) >= len(self), (\n            'The length of results is not equal to the dataset len: {} != {}'.\n            format(len(results), len(self)))\n\n        if jsonfile_prefix is None:\n            tmp_dir = tempfile.TemporaryDirectory()\n            jsonfile_prefix = osp.join(tmp_dir.name, 'results')\n        else:\n            tmp_dir = None\n\n        # currently the output prediction results could be in two formats\n        # 1. list of dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...)\n        # 2. list of dict('pts_bbox' or 'img_bbox':\n        #     dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...))\n        # this is a workaround to enable evaluation of both formats on nuScenes\n        # refer to https://github.com/open-mmlab/mmdetection3d/issues/449\n        if not ('pts_bbox' in results[0] or 'img_bbox' in results[0]):\n            result_files = self._format_bbox(results, jsonfile_prefix)\n        else:\n            # should take the inner dict out of 'pts_bbox' or 'img_bbox' dict\n            result_files = dict()\n            for name in ['pts_bbox']:\n                print(f'\\nFormating bboxes of {name}')\n                results_ = [out[name] for out in results]\n                tmp_file_ = osp.join(jsonfile_prefix, name)\n                result_files.update(\n                    {name: self._format_bbox(results_, tmp_file_)})\n        return result_files, tmp_dir\n\n    # def format_motion_results(self, results, jsonfile_prefix=None):\n    #     \"\"\"Format the results to json (standard format for COCO evaluation).\n\n    #     Args:\n    #         results (list[dict]): Testing results of the dataset.\n    #         jsonfile_prefix (str): The prefix of json files. It includes\n    #             the file path and the prefix of filename, e.g., \"a/b/prefix\".\n    #             If not specified, a temp file will be created. Default: None.\n\n    #     Returns:\n    #         tuple: Returns (result_files, tmp_dir), where `result_files` is a\n    #             dict containing the json filepaths, `tmp_dir` is the temporal\n    #             directory created for saving json files when\n    #             `jsonfile_prefix` is not specified.\n    #     \"\"\"\n    #     assert isinstance(results, list), 'results must be a list'\n    #     assert len(results) >= len(self), (\n    #         'The length of results is not equal to the dataset len: {} != {}'.\n    #         format(len(results), len(self)))\n\n    #     if jsonfile_prefix is None:\n    #         tmp_dir = tempfile.TemporaryDirectory()\n    #         jsonfile_prefix = osp.join(tmp_dir.name, 'results')\n    #     else:\n    #         tmp_dir = None\n\n    #     # currently the output prediction results could be in two formats\n    #     # 1. list of dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...)\n    #     # 2. list of dict('pts_bbox' or 'img_bbox':\n    #     #     dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...))\n    #     # this is a workaround to enable evaluation of both formats on nuScenes\n    #     # refer to https://github.com/open-mmlab/mmdetection3d/issues/449\n    #     if not ('pred_motion' in results[0]):\n    #         result_files = self._format_motion_bbox(results, jsonfile_prefix)\n    #     else:\n    #         # should take the inner dict out of 'pts_bbox' or 'img_bbox' dict\n    #         result_files = dict()\n    #         for name in ['pred_motion']:\n    #             print(f'\\nFormating bboxes of {name}')\n    #             results_ = [out[name] for out in results]\n    #             tmp_file_ = osp.join(jsonfile_prefix, name)\n    #             result_files.update(\n    #                 {name: self._format_motion_bbox(results_, tmp_file_)})\n    #     return result_files, tmp_dir\n\n    def evaluate(self, results,\n                       logger=None,\n                        metric='bbox',\n                        jsonfile_prefix='test',\n                        result_names=['pts_bbox'],\n                        show=False,\n                        out_dir=None,\n                        pipeline=None,\n                        save=False,\n                        ):\n            results_dict = {}\n            mmcv.mkdir_or_exist(jsonfile_prefix)\n\n            if results[0].get('pred_ego_traj', None) is not None:\n                results_dict.update(\n                    self.evaluate_ego_traj(\n                        results,\n                        jsonfile_prefix=jsonfile_prefix,\n                        logger=logger\n                    )\n                )\n            if results[0].get('pred_occupancy', None) is not None:\n                results_dict.update(self.evaluate_occupancy(results, show_dir=jsonfile_prefix, save=save))\n                \n            if results[0].get('iou', None) is not None:\n                results_dict.update(self.evaluate_mask(results))\n            \n            if results[0].get('pred_map', None) is not None:\n                results_dict.update(self.evaluate_map(results, jsonfile_prefix=jsonfile_prefix, out_dir=out_dir))\n\n\n            if results[0].get('pts_bbox', None) is not None:\n\n                results_dict.update(self.evaluate_bbox(results, logger=logger,\n                        metric=metric,\n                        jsonfile_prefix=jsonfile_prefix,\n                        result_names=result_names,\n                        show=show,\n                        out_dir=out_dir,\n                        pipeline=pipeline))\n                \n                \"\"\"if the output information has no tracking info, this func dose nothing\"\"\"\n                results_dict.update(self.evaluate_tracking(results, logger=logger,\n                        metric=metric,\n                        jsonfile_prefix=jsonfile_prefix,\n                        result_names=result_names,\n                        show=show,\n                        out_dir=out_dir,\n                        pipeline=pipeline))\n\n            with open(osp.join(jsonfile_prefix, 'results.csv'), 'w', newline='') as f:\n                writer = csv.writer(f)\n                for key in results_dict.keys():\n                    writer.writerow([key, results_dict[key]])\n\n            return results_dict\n\n\n\n    def evaluate_ego_traj(self, results, jsonfile_prefix=None, logger=None):\n        print('Start to convert traj format...')\n        l2_dist_list = []\n        res = torch.zeros(1, 6)\n        res_c = torch.zeros(1, 6)\n        processed_set = set()\n        ego_trajs_in_global_dict = dict(\n            trajs=dict(),\n            map_lane=dict(),\n            map_label=dict(),\n        )\n        c = 0\n        \n        gen_global_map = False\n        if gen_global_map:\n            self.vector_map = VectorizedLocalMap(self.data_root, \n                            patch_size=(400, 400), map_classes=self.MAPCLASSES, \n                            fixed_ptsnum_per_line=200,\n                            padding_value=-10000)\n        \n        for sample_id, traj in enumerate(mmcv.track_iter_progress(results)):\n            sample_id = traj['pred_ego_traj']['index']\n            l2_dist = traj['pred_ego_traj']['metric_dict'].pop('l2_dist')\n            if sample_id in processed_set: continue\n            # if traj['pred_ego_traj']['gt_ego_fut_cmd'][-1] == 1: continue\n            processed_set.add(sample_id)\n            c += 1\n            ego_trajs_in_global = traj['pred_ego_traj']['ego_trajs_in_global'].numpy()\n            ego_trajs_in_global_dict['trajs'][traj['pred_ego_traj']['index_w_scene']] = ego_trajs_in_global\n            mask = l2_dist >= 0\n            res[mask] += l2_dist[mask]\n            res_c[mask] += 1\n            info = self.data_infos[sample_id]\n            # print(traj['pred_ego_traj']['index_w_scene'], info['prev']=='', sample_id, traj['pred_ego_traj']['index'])\n            if gen_global_map and info['prev']=='':\n                \n                ego_pose = torch.FloatTensor(nuscenes_get_rt_matrix(\n                        self.data_infos[sample_id], self.data_infos[sample_id],\n                        \"ego\", \"global\"))\n                ego2global_translation = list(ego_pose[:3,3].numpy())\n                map_res = self.vectormap_pipeline(info['map_location'], ego2global_translation, 0)\n                lanes = map_res['map_gt_bboxes_3d'].data.fixed_num_sampled_points.cpu().numpy() \n                lanes = lanes + ego2global_translation[:2]\n                lanes_label = map_res['map_gt_labels_3d'].data.cpu().numpy()\n                ego_trajs_in_global_dict['map_lane'][traj['pred_ego_traj']['index_w_scene']] = lanes\n                # results[sample_id]['pred_map']['gt_lane_in_global']\n                ego_trajs_in_global_dict['map_label'][traj['pred_ego_traj']['index_w_scene']] = lanes_label\n                #     results[sample_id]['pred_map']['gt_lane_label']\n\n        print('valid: ', c)\n\n        l2_dist = (res/res_c).cpu().numpy()\n       \n        print('++++++++++++++')\n        print('l2_dist')\n        print(l2_dist)\n        print('--------------')\n\n\n        metric_dict = [None, None, None]\n\n        for i in range(3):\n            num_valid = 0\n            processed_set = set()\n            for sample_id, traj in enumerate(mmcv.track_iter_progress(results)):\n                sample_id = traj['pred_ego_traj']['index']\n                if sample_id in processed_set: continue\n                if i == 1 and traj['pred_ego_traj']['gt_ego_fut_cmd'][-1] == 1: continue\n                if i == 2 and traj['pred_ego_traj']['gt_ego_fut_cmd'][-1] != 1: continue\n                processed_set.add(sample_id)\n                if not traj['pred_ego_traj']['metric_dict']['fut_valid_flag']: continue\n                else: num_valid += 1\n\n                if metric_dict[i] is None:\n                    metric_dict[i] = copy.deepcopy(traj['pred_ego_traj']['metric_dict'])\n                else:\n                    for k in traj['pred_ego_traj']['metric_dict'].keys():\n                        metric_dict[i][k] += traj['pred_ego_traj']['metric_dict'][k]\n\n            print('valid_after: ', num_valid, i)\n            for k in metric_dict[i]:\n                metric_dict[i][k] = str(metric_dict[i][k] / num_valid)\n                print(\"{}:{}:{}\".format(i, k, metric_dict[i][k]))\n\n        res_path = osp.join(jsonfile_prefix, 'results_nusc_planning.json')\n        print('Results writes to', res_path)\n    \n        mmcv.dump(ego_trajs_in_global_dict, res_path)\n        metric_dict[0].update(self.smoothness(ego_trajs_in_global_dict['trajs']))\n        #     l2_dist_1s = traj['pred_ego_traj']['metric_dict']['plan_L2_1s']\n        #     l2_dist_2s = traj['pred_ego_traj']['metric_dict']['plan_L2_2s']\n        #     l2_dist_3s = traj['pred_ego_traj']['metric_dict']['plan_L2_3s']\n        #     res2[0] = res2[0] + l2_dist_1s\n        #     res2[1] = res2[1] + l2_dist_2s\n        #     res2[2] = res2[2] + l2_dist_3s\n        #     res2_c += 1\n        # l2_dist_v2 = res2/res2_c\n        \n        # print('++++++++++++++')\n        # print('l2_dist_v2')\n        # print(l2_dist_v2)\n        avg_l2 = 0\n        avg_col = 0\n        for i in range(1,4):\n            avg_l2 += float(metric_dict[0][f'plan_L2_{i}s'])\n            avg_col += float(metric_dict[0][f'plan_obj_box_col_{i}s'])\n        avg_l2 /= 3\n        avg_col /= 3\n        print(f'avg_l2 {avg_l2}, avg_col {avg_col}')\n        print('--------------')\n        # metric_dict['l2_dist'] = l2_dist\n        metric_dict[0]['avg_l2'] = avg_l2\n        metric_dict[0]['avg_col'] = avg_col\n        return metric_dict[0]\n\n    def smoothness(self, data):\n        keys = list(data.keys())\n        # print(keys)\n        new_keys = []\n        for key in keys:\n            s = key.split(\"-\")\n            new_keys.append([int(s[1]),int(s[2])])\n\n        new_keys=sorted(new_keys,key=(lambda x:(x[0], x[1])))\n        sorted_keys = []\n        for key in new_keys:\n            v = ['scene',  str(key[0]).zfill(4), str(key[1]) ]\n            k='-'.join(v)\n            sorted_keys.append(k)\n\n\n        all_scene_keys=[]\n        key='-'.join(sorted_keys[0].split(\"-\")[:2])\n        scene=[]\n\n        for k in sorted_keys:\n            if(key in k):\n                # print(True)\n                scene.append(k)\n            else:\n                s =k.split(\"-\")\n                key='-'.join(s[:2])\n                all_scene_keys.append(scene)\n                scene=[k]\n\n        #tranform raw data\n        new_data={}\n        for keys in all_scene_keys:\n            l = len(keys)\n            for i in range(l):\n                val = []\n                index = i\n                for j in range(i+1):\n                    if index>6:\n                        index-=1\n                    else:\n                        val.append(data[keys[j]][index])\n                        index-=1\n                new_data[keys[i]]=val\n\n        #compute mean and var\n        res = {\n            'stable_mean_distance_1s': [],\n            'stable_variance_distance_1s': [],\n            'stable_mean_distance_2s': [],\n            'stable_variance_distance_2s': [],\n            'stable_mean_distance_3s': [],\n            'stable_variance_distance_3s': [],\n        }\n        \n        for key, value in new_data.items():\n            #filter unstable data\n            if(len(value)!=7):\n                continue\n            assert len(value)==7\n            #compute mean\n            for window in [1, 2, 3]:\n                gt = value[-1]\n                pred = value[6-window*2:-1]\n                #compute var\n                data_array = np.array(pred)\n\n                distances = np.linalg.norm(data_array - gt, axis=1)\n                mean_distance = np.mean(distances)\n                variance_distance = np.var(distances)\n                res[f'stable_mean_distance_{window}s'].append(mean_distance)\n                res[f'stable_variance_distance_{window}s'].append(variance_distance)\n        \n        for key in res.keys():\n            res[key] = np.mean(res[key])\n        print(res)\n        return res\n    \n    def _format_bbox(self, results, jsonfile_prefix=None):\n        \"\"\"Convert the results to the standard format.\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            jsonfile_prefix (str): The prefix of the output jsonfile.\n                You can specify the output directory/filename by\n                modifying the jsonfile_prefix. Default: None.\n\n        Returns:\n            str: Path of the output json file.\n        \"\"\"\n        nusc_annos = {}\n        mapped_class_names = self.CLASSES\n        print('Start to convert detection format...')\n        for sample_id, det in enumerate(mmcv.track_iter_progress(results)):\n            boxes = det['boxes_3d'].tensor.numpy()\n            scores = det['scores_3d'].numpy()\n            labels = det['labels_3d'].numpy()\n            sample_id = det.get('index', sample_id)\n\n            sample_token = self.data_infos[sample_id]['token']\n\n            trans = self.data_infos[sample_id]['cams'][\n                self.ego_cam]['ego2global_translation']\n            rot = self.data_infos[sample_id]['cams'][\n                self.ego_cam]['ego2global_rotation']\n            rot = pyquaternion.Quaternion(rot)\n            annos = list()\n            for i, box in enumerate(boxes):\n                name = mapped_class_names[labels[i]]\n                center = box[:3]\n                wlh = box[[4, 3, 5]]\n                box_yaw = box[6]\n                box_vel = box[7:].tolist()\n                box_vel.append(0)\n                quat = pyquaternion.Quaternion(axis=[0, 0, 1], radians=box_yaw)\n                nusc_box = NuScenesBox(center, wlh, quat, velocity=box_vel)\n                nusc_box.rotate(rot)\n                nusc_box.translate(trans)\n                if np.sqrt(nusc_box.velocity[0]**2 +\n                           nusc_box.velocity[1]**2) > 0.2:\n                    if name in [\n                            'car',\n                            'construction_vehicle',\n                            'bus',\n                            'truck',\n                            'trailer',\n                    ]:\n                        attr = 'vehicle.moving'\n                    elif name in ['bicycle', 'motorcycle']:\n                        attr = 'cycle.with_rider'\n                    else:\n                        attr = self.DefaultAttribute[name]\n                else:\n                    if name in ['pedestrian']:\n                        attr = 'pedestrian.standing'\n                    elif name in ['bus']:\n                        attr = 'vehicle.stopped'\n                    else:\n                        attr = self.DefaultAttribute[name]\n                nusc_anno = dict(\n                    sample_token=sample_token,\n                    translation=nusc_box.center.tolist(),\n                    size=nusc_box.wlh.tolist(),\n                    rotation=nusc_box.orientation.elements.tolist(),\n                    velocity=nusc_box.velocity[:2],\n                    detection_name=name,\n                    detection_score=float(scores[i]),\n                    attribute_name=attr,\n                )\n                annos.append(nusc_anno)\n            # other views results of the same frame should be concatenated\n            if sample_token in nusc_annos:\n                pass\n                # nusc_annos[sample_token].extend(annos)\n            else:\n                nusc_annos[sample_token] = annos\n        nusc_submissions = {\n            'meta': self.modality,\n            'results': nusc_annos,\n        }\n\n        mmcv.mkdir_or_exist(jsonfile_prefix)\n        res_path = osp.join(jsonfile_prefix, 'results_nusc.json')\n        print('Results writes to', res_path)\n        mmcv.dump(nusc_submissions, res_path)\n        return res_path\n\n\n    def evaluate_tracking(self,\n                          results,\n                          metric='bbox',\n                          logger=None,\n                          jsonfile_prefix=None,\n                          result_names=['pts_bbox'],\n                          show=False,\n                          out_dir=None,\n                          pipeline=None):\n        \"\"\"Evaluation in nuScenes protocol.\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            metric (str | list[str]): Metrics to be evaluated.\n            logger (logging.Logger | str | None): Logger used for printing\n                related information during evaluation. Default: None.\n            jsonfile_prefix (str | None): The prefix of json files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n            show (bool): Whether to visualize.\n                Default: False.\n            out_dir (str): Path to save the visualization results.\n                Default: None.\n            pipeline (list[dict], optional): raw data loading for showing.\n                Default: None.\n\n        Returns:\n            dict[str, float]: Results of each evaluation metric.\n        \"\"\"\n        result_files, tmp_dir, with_motion = self.format_tracking_results(results, jsonfile_prefix)\n\n        if isinstance(result_files, dict):\n            results_dict = dict()\n            for name in result_names:                    \n                print('Evaluating tracking bboxes of {}'.format(name))\n                ret_dict = self._evaluate_tracking_single(result_files[name])\n                results_dict.update(ret_dict)\n                if with_motion:\n                    print('Evaluating motion bboxes of {}'.format(name))\n                    ret_dict = self._evaluate_motion_single(result_files[name])\n                    results_dict.update(ret_dict)\n\n        elif isinstance(result_files, str):\n            results_dict = self._evaluate_tracking_single(result_files)\n            if with_motion:\n                print('Evaluating motion bboxes of')\n                ret_dict = self._evaluate_motion_single(result_files)\n                results_dict.update(ret_dict)\n\n        if tmp_dir is not None:\n            tmp_dir.cleanup()\n\n        if show:\n            self.show(results, out_dir, pipeline=pipeline)\n        return results_dict\n\n    def format_tracking_results(self, results, jsonfile_prefix=None):\n        \"\"\"Format the results to json (standard format for COCO evaluation).\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            jsonfile_prefix (str): The prefix of json files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n\n        Returns:\n            tuple: Returns (result_files, tmp_dir), where `result_files` is a\n                dict containing the json filepaths, `tmp_dir` is the temporal\n                directory created for saving json files when\n                `jsonfile_prefix` is not specified.\n        \"\"\"\n        assert isinstance(results, list), 'results must be a list'\n        assert len(results) >= len(self), (\n            'The length of results is not equal to the dataset len: {} != {}'.\n            format(len(results), len(self)))\n\n        if jsonfile_prefix is None:\n            tmp_dir = tempfile.TemporaryDirectory()\n            jsonfile_prefix = osp.join(tmp_dir.name, 'results')\n        else:\n            tmp_dir = None\n\n        # currently the output prediction results could be in two formats\n        # 1. list of dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...)\n        # 2. list of dict('pts_bbox' or 'img_bbox':\n        #     dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...))\n        # this is a workaround to enable evaluation of both formats on nuScenes\n        # refer to https://github.com/open-mmlab/mmdetection3d/issues/449\n        if not ('pts_bbox' in results[0] or 'img_bbox' in results[0]):\n            result_files, with_motion = self._format_tracking_bbox(results, jsonfile_prefix)\n        else:\n            # should take the inner dict out of 'pts_bbox' or 'img_bbox' dict\n            result_files = dict()\n            for name in ['pts_bbox']:\n                print(f'\\nFormating tracking bboxes of {name}')\n                results_ = [out[name] for out in results]\n                tmp_file_ = osp.join(jsonfile_prefix, name)\n                result_file, with_motion = self._format_tracking_bbox(results_, tmp_file_)\n                result_files.update(\n                    {name: result_file})\n        return result_files, tmp_dir, with_motion\n\n    def _format_tracking_bbox(self, results, jsonfile_prefix=None):\n        \"\"\"Convert the results to the standard format.\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            jsonfile_prefix (str): The prefix of the output jsonfile.\n                You can specify the output directory/filename by\n                modifying the jsonfile_prefix. Default: None.\n\n        Returns:\n            str: Path of the output json file.\n        \"\"\"\n        nusc_annos = {}\n        mapped_class_names = self.CLASSES\n        print('Start to convert tracking format...')\n        processed_set = set()\n        with_motion = False\n        for sample_id, det in enumerate(mmcv.track_iter_progress(results)):\n            boxes = det['boxes_3d'].tensor.numpy()\n            # scores = det['scores_3d'].numpy()\n            labels = det['labels_3d'].numpy()\n            sample_id = det.get('index', sample_id)\n            if 'track_scores' not in det:\n                print('no tracking info')\n                return None, with_motion\n            tracking_scores = det['track_scores'].numpy()\n            \n            obj_idxes = det['obj_idxes'].numpy()\n            if sample_id in processed_set: continue\n            processed_set.add(sample_id)\n            sample_token = self.data_infos[sample_id]['token']\n  \n            trans = self.data_infos[sample_id]['cams'][\n                self.ego_cam]['ego2global_translation']\n            rot = self.data_infos[sample_id]['cams'][\n                self.ego_cam]['ego2global_rotation']\n            rot = pyquaternion.Quaternion(rot)\n            annos = list()\n\n            for i, box in enumerate(boxes):\n                if tracking_scores[i] < 0: continue\n                name = mapped_class_names[labels[i]]\n                if name not in self.TRACKING_CLASSES: continue\n                center = box[:3]\n                wlh = box[[4, 3, 5]]\n                box_yaw = box[6]\n                box_vel = box[7:].tolist()\n                box_vel.append(0)\n                quat =  pyquaternion.Quaternion(axis=[0, 0, 1], radians=box_yaw)\n                nusc_box = NuScenesBox(center, wlh, quat, velocity=box_vel)\n                nusc_box.rotate(rot)\n                nusc_box.translate(trans)\n                if np.sqrt(nusc_box.velocity[0]**2 +\n                           nusc_box.velocity[1]**2) > 0.2:\n                    if name in [\n                            'car',\n                            'construction_vehicle',\n                            'bus',\n                            'truck',\n                            'trailer',\n                    ]:\n                        attr = 'vehicle.moving'\n                    elif name in ['bicycle', 'motorcycle']:\n                        attr = 'cycle.with_rider'\n                    else:\n                        attr = self.DefaultAttribute[name]\n                else:\n                    if name in ['pedestrian']:\n                        attr = 'pedestrian.standing'\n                    elif name in ['bus']:\n                        attr = 'vehicle.stopped'\n                    else:\n                        attr = self.DefaultAttribute[name]\n                nusc_anno = dict(\n                    sample_token=sample_token,\n                    translation=nusc_box.center.tolist(),\n                    size=nusc_box.wlh.tolist(),\n                    rotation=nusc_box.orientation.elements.tolist(),\n                    velocity=nusc_box.velocity[:2],\n                    tracking_name=name,\n                    detection_name=name,\n                    detection_score=float(tracking_scores[i]),\n                    attribute_name=attr,\n                    tracking_score=float(tracking_scores[i]),\n                    tracking_id=obj_idxes[i]\n                )\n                if 'motion_traj' in det:\n                    with_motion = True\n                    nusc_anno['traj'] = det['motion_traj'][i]\n                    nusc_anno['traj_scores'] = det['motion_cls'][i]\n                annos.append(nusc_anno)\n            # other views results of the same frame should be concatenated\n            if sample_token in nusc_annos:\n                pass\n                # nusc_annos[sample_token].extend(annos)\n            else:\n                nusc_annos[sample_token] = annos\n        nusc_submissions = {\n            'meta': self.modality,\n            'results': nusc_annos,\n        }\n\n        mmcv.mkdir_or_exist(jsonfile_prefix)\n        res_path = osp.join(jsonfile_prefix, 'results_nusc_tracking.json')\n        print('Results writes to', res_path)\n        mmcv.dump(nusc_submissions, res_path)\n        return res_path, with_motion\n\n\n\n    def _evaluate_motion_single(self,\n                                  result_path,\n                                  logger=None,\n                                  metric='bbox',\n                                  result_name='pts_bbox'):\n        \"\"\"Evaluation for a single model in nuScenes protocol.\n\n        Args:\n            result_path (str): Path of the result file.\n            logger (logging.Logger | str | None): Logger used for printing\n                related information during evaluation. Default: None.\n            metric (str): Metric name used for evaluation. Default: 'bbox'.\n            result_name (str): Result name in the metric prefix.\n                Default: 'pts_bbox'.\n\n        Returns:\n            dict: Dictionary of evaluation details.\n        \"\"\"\n        if result_path is None: return {}\n        from nuscenes import NuScenes\n        output_dir = osp.join(*osp.split(result_path)[:-1])\n        eval_set_map = {\n            'v1.0-mini': 'mini_val',\n            'v1.0-trainval': 'val',\n        }\n        from .evals.nuscenes_eval_motion import MotionEval\n        if self.nusc is None:\n            self.nusc = NuScenes(version=self.version, dataroot=self.data_root, verbose=False)\n        self.nusc_eval_motion = MotionEval(\n            self.nusc,\n            config=self.eval_detection_configs,\n            result_path=result_path,\n            eval_set=eval_set_map[self.version],\n            output_dir=output_dir,\n            verbose=True,\n            data_infos=self.data_infos,\n            ann_file=self.ann_file,\n            category_convert_type='motion_category'\n        )\n        print('-'*50)\n        print(\n            'Evaluate on motion category, merge class for vehicles and pedestrians...')\n        print('evaluate standard motion metrics...')\n        self.nusc_eval_motion.main(\n            plot_examples=0,\n            render_curves=False,\n            eval_mode='standard')\n        print('evaluate motion mAP-minFDE metrics...')\n        self.nusc_eval_motion.main(\n            plot_examples=0,\n            render_curves=False,\n            eval_mode='motion_map')\n        print('evaluate EPA motion metrics...')\n        self.nusc_eval_motion.main(\n            plot_examples=0,\n            render_curves=False,\n            eval_mode='epa')\n        print('-'*50)\n        print('Evaluate on detection category...')\n        self.nusc_eval_motion = MotionEval(\n            self.nusc,\n            config=self.eval_detection_configs,\n            result_path=result_path,\n            eval_set=eval_set_map[self.version],\n            output_dir=output_dir,\n            verbose=True,\n            data_infos=self.data_infos,\n            category_convert_type='detection_category'\n        )\n        print('evaluate standard motion metrics...')\n        self.nusc_eval_motion.main(\n            plot_examples=0,\n            render_curves=False,\n            eval_mode='standard')\n        print('evaluate EPA motion metrics...')\n        self.nusc_eval_motion.main(\n            plot_examples=0,\n            render_curves=False,\n            eval_mode='motion_map')\n        print('evaluate EPA motion metrics...')\n        self.nusc_eval_motion.main(\n            plot_examples=0,\n            render_curves=False,\n            eval_mode='epa')\n        return {}\n            \n    def _evaluate_tracking_single(self,\n                                  result_path,\n                                  logger=None,\n                                  metric='bbox',\n                                  result_name='pts_bbox'):\n        \"\"\"Evaluation for a single model in nuScenes protocol.\n\n        Args:\n            result_path (str): Path of the result file.\n            logger (logging.Logger | str | None): Logger used for printing\n                related information during evaluation. Default: None.\n            metric (str): Metric name used for evaluation. Default: 'bbox'.\n            result_name (str): Result name in the metric prefix.\n                Default: 'pts_bbox'.\n\n        Returns:\n            dict: Dictionary of evaluation details.\n        \"\"\"\n        if result_path is None: return {}\n        from nuscenes import NuScenes\n        output_dir = osp.join(*osp.split(result_path)[:-1])\n\n        eval_set_map = {\n            'v1.0-mini': 'mini_val',\n            'v1.0-trainval': 'val',\n        }\n        from nuscenes.eval.tracking.evaluate import TrackingEval\n        from nuscenes.eval.common.config import config_factory as track_configs\n\n        cfg = track_configs(\"tracking_nips_2019\")\n        nusc_eval = TrackingEval(\n            config=cfg,\n            result_path=result_path,\n            eval_set=eval_set_map[self.version],\n            output_dir=output_dir,\n            verbose=True,\n            nusc_version=self.version,\n            nusc_dataroot=self.data_root\n        )\n        metrics = nusc_eval.main()\n        # record metrics\n        metrics = mmcv.load(osp.join(output_dir, 'metrics_summary.json'))\n        print(metrics)\n        detail = dict()\n        metric_prefix = f'{result_name}_NuScenes'\n        keys = ['amota', 'amotp', 'recall', 'motar',\n                'gt', 'mota', 'motp', 'mt', 'ml', 'faf',\n                'tp', 'fp', 'fn', 'ids', 'frag', 'tid', 'lgd']\n        for key in keys:\n            detail['{}/{}'.format(metric_prefix, key)] = metrics[key]\n        return detail       \n\n    def evaluate_occupancy(self, occ_results, runner=None, show_dir=None, save=False, **eval_kwargs):\n        from .occ_metrics import Metric_mIoU, Metric_FScore\n        if show_dir is not None:\n            # import os\n            # if not os.path.exists(show_dir):\n\n            mmcv.mkdir_or_exist(show_dir)\n            mmcv.mkdir_or_exist(os.path.join(show_dir, 'occupancy_pred'))\n            print('\\nSaving output and gt in {} for visualization.'.format(show_dir))\n            begin= 0 # eval_kwargs.get('begin',None)\n\n            end=1 if not save else len(occ_results) # eval_kwargs.get('end',None)\n        self.occ_eval_metrics = Metric_mIoU(\n            num_classes=18,\n            use_lidar_mask=False,\n            use_image_mask=True)\n        \n        self.eval_fscore = False\n        if  self.eval_fscore:\n            self.fscore_eval_metrics = Metric_FScore(\n                leaf_size=10,\n                threshold_acc=0.4,\n                threshold_complete=0.4,\n                voxel_size=[0.4, 0.4, 0.4],\n                range=[-40, -40, -1, 40, 40, 5.4],\n                void=[17, 255],\n                use_lidar_mask=False,\n                use_image_mask=True,\n            )\n        count = 0\n        print('\\nStarting Evaluation...')\n        processed_set = set()\n        for occ_pred_w_index in tqdm(occ_results):\n            index = occ_pred_w_index['index']\n            if index in processed_set: continue\n            processed_set.add(index)\n\n            occ_pred = occ_pred_w_index['pred_occupancy']\n            info = self.data_infos[index]\n            scene_name = info['scene_name']\n            sample_token = info['token']\n            occupancy_file_path = osp.join(self.occupancy_path, scene_name, sample_token, 'labels.npz')\n            occ_gt = np.load(occupancy_file_path)\n \n            gt_semantics = occ_gt['semantics']\n            mask_lidar = occ_gt['mask_lidar'].astype(bool)\n            mask_camera = occ_gt['mask_camera'].astype(bool)            \n            # if show_dir is not None:\n            #     if begin is not None and end is not None:\n            #         if index>= begin and index<end:\n            #             sample_token = info['token']\n            #             count += 1\n            #             save_path = os.path.join(show_dir, 'occupancy_pred', scene_name+'_'+sample_token)\n            #             np.savez_compressed(save_path, pred=occ_pred[mask_camera], gt=occ_gt, sample_token=sample_token)\n            #             with open(os.path.join(show_dir, 'occupancy_pred', 'file.txt'),'a') as f:\n            #                 f.write(save_path+'\\n')\n                        # np.savez_compressed(save_path+'_gt', pred= occ_gt['semantics'], gt=occ_gt, sample_token=sample_token)\n                # else:\n                #     sample_token=info['token']\n                #     save_path=os.path.join(show_dir,str(index).zfill(4))\n                #     np.savez_compressed(save_path,pred=occ_pred,gt=occ_gt,sample_token=sample_token)\n\n\n            self.occ_eval_metrics.add_batch(occ_pred[mask_camera], gt_semantics, mask_lidar, mask_camera)\n            if self.eval_fscore:\n                self.fscore_eval_metrics.add_batch(occ_pred[mask_camera], gt_semantics, mask_lidar, mask_camera)\n   \n        res = self.occ_eval_metrics.count_miou()\n        if self.eval_fscore:\n            res.update(self.fscore_eval_metrics.count_fscore())\n        \n\n        return res \n        \n    def evaluate_mask(self, results):\n        results_dict = {}\n        iou = 0\n        # ret_f1=[0,0,0,0,0]\n        for i in range(len(results)):\n            iou+=results[i]['iou']\n        n=len(results)\n        iou = iou/n\n        results_dict['iou'] = iou\n        return results_dict\n\n    def evaluate_map(self,\n                 results,\n                 map_metric='chamfer',\n                 logger=None,\n                 jsonfile_prefix=None,\n                 result_names=['pred_map'],\n                 show=False,\n                 out_dir=None,\n                 pipeline=None):\n        \"\"\"Evaluation in nuScenes protocol.\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            metric (str | list[str]): Metrics to be evaluated.\n            logger (logging.Logger | str | None): Logger used for printing\n                related information during evaluation. Default: None.\n            jsonfile_prefix (str | None): The prefix of json files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n            show (bool): Whether to visualize.\n                Default: False.\n            out_dir (str): Path to save the visualization results.\n                Default: None.\n            pipeline (list[dict], optional): raw data loading for showing.\n                Default: None.\n\n        Returns:\n            dict[str, float]: Results of each evaluation metric.\n        \"\"\"\n\n        result_files, tmp_dir = self.format_map_results(results, jsonfile_prefix)\n\n        if isinstance(result_files, dict):\n            results_dict = dict()\n            for name in result_names:\n                print('Evaluating map of {}'.format(name))\n                ret_dict = self._evaluate_map_single(result_files[name], map_metric=map_metric)\n            results_dict.update(ret_dict)\n        elif isinstance(result_files, str):\n            results_dict = self._evaluate_map_single(result_files, map_metric=map_metric)\n\n        if tmp_dir is not None:\n            tmp_dir.cleanup()\n\n        if show:\n            self.show(results, out_dir, pipeline=pipeline)\n        return results_dict\n\n\n    def world2bev_vis(self, x, y):\n            return int((x + 51.2) * 15), int((y + 51.2) * 15)\n\n    def __map_visual__(self, gt_map, pred_map, index=0):\n        \n        import cv2\n        for t, map_ in enumerate([gt_map, pred_map]):\n            bev_img = np.ones([2000, 2000, 3], dtype=np.float32) * 255\n            bev_img = bev_img.astype(np.float32)\n\n            bev_img = cv2.circle(bev_img, self.world2bev_vis(0, 0), 5, (0, 255, 0), thickness=-1) \n            # from IPython import embed\n            # embed()\n            # exit()\n\n            for k, line in enumerate(map_):\n                label = line['type']\n                score = line.get('confidence_level', 1)\n                pts = line['pts']\n                if score<0.3: continue\n                corners = np.array([self.world2bev_vis(*pt) for pt in pts])\n                corners = [each for each in corners if ((each>=0).all() & (each<2000).all())]\n                colors = [(255, 255, 0), (255, 0, 0), (0, 255, 0)]\n                for i, corner in enumerate(corners[:-1]):\n                    bev_img = cv2.circle(bev_img, corners[i], 2, (61, 102, 255))\n                    bev_img = cv2.line(bev_img, corners[i], corners[i+1], color=colors[label], thickness=1)\n            mmcv.imwrite(bev_img, f'map_{index}_{t}.png')\n        print('saved') \n\n    def _evaluate_map_single(self,\n                         result_path,\n                         logger=None,\n                         metric='bbox',\n                         map_metric='chamfer',\n                         result_name='pts_bbox'):\n        \"\"\"Evaluation for a single model in nuScenes protocol.\n\n        Args:\n            result_path (str): Path of the result file.\n            logger (logging.Logger | str | None): Logger used for printing\n                related information during evaluation. Default: None.\n            metric (str): Metric name used for evaluation. Default: 'bbox'.\n            result_name (str): Result name in the metric prefix.\n                Default: 'pts_bbox'.\n\n        Returns:\n            dict: Dictionary of evaluation details.\n        \"\"\"\n        detail = dict()\n\n        output_dir = osp.join(*osp.split(result_path)[:-1])\n\n        from .map_utils.mean_ap import eval_map\n        from .map_utils.mean_ap import format_res_gt_by_classes\n        result_path = osp.abspath(result_path)\n        \n        print('Formating results & gts by classes')\n        pred_results = mmcv.load(result_path)\n        map_results = pred_results['map_results']\n        gt_anns = mmcv.load(self.map_ann_file)\n        map_annotations = gt_anns['GTs']\n        cls_gens, cls_gts = format_res_gt_by_classes(result_path,\n                                                     map_results,\n                                                     map_annotations,\n                                                     cls_names=self.MAPCLASSES,\n                                                     num_pred_pts_per_instance=20,\n                                                     eval_use_same_gt_sample_num_flag=True,\n                                                     pc_range=self.pc_range)\n        # for i in range(10):\n        #     self.__map_visual__(map_annotations[i]['vectors'], map_results[map_annotations[i]['sample_token']]['vectors'], index=i)\n        map_metrics = map_metric if isinstance(map_metric, list) else [map_metric]\n        allowed_metrics = ['chamfer', 'iou']\n        for metric in map_metrics:\n            if metric not in allowed_metrics:\n                raise KeyError(f'metric {metric} is not supported')\n        for metric in map_metrics:\n            print('-*'*10+f'use metric:{metric}'+'-*'*10)\n            if metric == 'chamfer':\n                thresholds = [0.5,1.0,1.5]\n            elif metric == 'iou':\n                thresholds= np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True)\n            cls_aps = np.zeros((len(thresholds),self.NUM_MAPCLASSES))\n            for i, thr in enumerate(thresholds):\n                print('-*'*10+f'threshhold:{thr}'+'-*'*10)\n                mAP, cls_ap = eval_map(\n                                map_results,\n                                map_annotations,\n                                cls_gens,\n                                cls_gts,\n                                threshold=thr,\n                                cls_names=self.MAPCLASSES,\n                                logger=logger,\n                                num_pred_pts_per_instance=20,\n                                pc_range=self.pc_range,\n                                metric=metric)\n                for j in range(self.NUM_MAPCLASSES):\n                    cls_aps[i, j] = cls_ap[j]['ap']\n            for i, name in enumerate(self.MAPCLASSES):\n                print('{}: {}'.format(name, cls_aps.mean(0)[i]))\n                detail['NuscMap_{}/{}_AP'.format(metric,name)] =  cls_aps.mean(0)[i]\n            print('map: {}'.format(cls_aps.mean(0).mean()))\n            detail['NuscMap_{}/mAP'.format(metric)] = cls_aps.mean(0).mean()\n            for i, name in enumerate(self.MAPCLASSES):\n                for j, thr in enumerate(thresholds):\n                    if metric == 'chamfer':\n                        detail['NuscMap_{}/{}_AP_thr_{}'.format(metric,name,thr)]=cls_aps[j][i]\n                    elif metric == 'iou':\n                        if thr == 0.5 or thr == 0.75:\n                            detail['NuscMap_{}/{}_AP_thr_{}'.format(metric,name,thr)]=cls_aps[j][i]\n\n        return detail\n    \n\n    def evaluate_bbox(self,\n                 results,\n                 metric='bbox',\n                 logger=None,\n                 jsonfile_prefix='test',\n                 result_names=['pts_bbox'],\n                 show=False,\n                 out_dir=None,\n                 pipeline=None):\n        \"\"\"Evaluation in nuScenes protocol.\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            metric (str | list[str], optional): Metrics to be evaluated.\n                Default: 'bbox'.\n            logger (logging.Logger | str, optional): Logger used for printing\n                related information during evaluation. Default: None.\n            jsonfile_prefix (str, optional): The prefix of json files including\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n            show (bool, optional): Whether to visualize.\n                Default: False.\n            out_dir (str, optional): Path to save the visualization results.\n                Default: None.\n            pipeline (list[dict], optional): raw data loading for showing.\n                Default: None.\n\n        Returns:\n            dict[str, float]: Results of each evaluation metric.\n        \"\"\"\n        result_files, tmp_dir = self.format_results(results, jsonfile_prefix)\n\n\n        if isinstance(result_files, dict):\n            results_dict = dict()\n            for name in result_names:\n                print('Evaluating bboxes of {}'.format(name))\n                ret_dict = self._evaluate_single(result_files[name])\n            results_dict.update(ret_dict)\n        elif isinstance(result_files, str):\n            results_dict = self._evaluate_single(result_files)\n\n        if tmp_dir is not None:\n            tmp_dir.cleanup()\n\n        if show or out_dir:\n            self.show(results, out_dir, show=show, pipeline=pipeline)\n\n        return results_dict\n\n    def _build_default_pipeline(self):\n        \"\"\"Build the default pipeline for this dataset.\"\"\"\n        pipeline = [\n            dict(\n                type='LoadPointsFromFile',\n                coord_type='LIDAR',\n                load_dim=5,\n                use_dim=5,\n                file_client_args=dict(backend='disk')),\n            dict(\n                type='LoadPointsFromMultiSweeps',\n                sweeps_num=10,\n                file_client_args=dict(backend='disk')),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=self.CLASSES,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ]\n        return Compose(pipeline)\n\n    def show(self, results, out_dir, show=False, pipeline=None):\n        \"\"\"Results visualization.\n\n        Args:\n            results (list[dict]): List of bounding boxes results.\n            out_dir (str): Output directory of visualization result.\n            show (bool): Whether to visualize the results online.\n                Default: False.\n            pipeline (list[dict], optional): raw data loading for showing.\n                Default: None.\n        \"\"\"\n        assert out_dir is not None, 'Expect out_dir, got none.'\n        pipeline = self._get_pipeline(pipeline)\n        for i, result in enumerate(results):\n            if 'pts_bbox' in result.keys():\n                result = result['pts_bbox']\n            data_info = self.data_infos[i]\n            pts_path = data_info['lidar_path']\n            file_name = osp.split(pts_path)[-1].split('.')[0]\n            points = self._extract_data(i, pipeline, 'points').numpy()\n            # for now we convert points into depth mode\n            points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR,\n                                               Coord3DMode.DEPTH)\n            inds = result['scores_3d'] > 0.1\n            gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()\n            show_gt_bboxes = Box3DMode.convert(gt_bboxes, Box3DMode.LIDAR,\n                                               Box3DMode.DEPTH)\n            pred_bboxes = result['boxes_3d'][inds].tensor.numpy()\n            show_pred_bboxes = Box3DMode.convert(pred_bboxes, Box3DMode.LIDAR,\n                                                 Box3DMode.DEPTH)\n            show_result(points, show_gt_bboxes, show_pred_bboxes, out_dir,\n                        file_name, show)\n\n\ndef output_to_nusc_box(detection, with_velocity=True):\n    \"\"\"Convert the output to the box class in the nuScenes.\n\n    Args:\n        detection (dict): Detection results.\n\n            - boxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.\n            - scores_3d (torch.Tensor): Detection scores.\n            - labels_3d (torch.Tensor): Predicted box labels.\n\n    Returns:\n        list[:obj:`NuScenesBox`]: List of standard NuScenesBoxes.\n    \"\"\"\n    box3d = detection['boxes_3d']\n    scores = detection['scores_3d'].numpy()\n    labels = detection['labels_3d'].numpy()\n\n    box_gravity_center = box3d.gravity_center.numpy()\n    box_dims = box3d.dims.numpy()\n    box_yaw = box3d.yaw.numpy()\n\n    # our LiDAR coordinate system -> nuScenes box coordinate system\n    nus_box_dims = box_dims[:, [1, 0, 2]]\n\n    box_list = []\n    for i in range(len(box3d)):\n        quat = pyquaternion.Quaternion(axis=[0, 0, 1], radians=box_yaw[i])\n        if with_velocity:\n            velocity = (*box3d.tensor[i, 7:9], 0.0)\n        else:\n            velocity = (0, 0, 0)\n        # velo_val = np.linalg.norm(box3d[i, 7:9])\n        # velo_ori = box3d[i, 6]\n        # velocity = (\n        # velo_val * np.cos(velo_ori), velo_val * np.sin(velo_ori), 0.0)\n        box = NuScenesBox(\n            box_gravity_center[i],\n            nus_box_dims[i],\n            quat,\n            label=labels[i],\n            score=scores[i],\n            velocity=velocity)\n        box_list.append(box)\n    return box_list\n\n@DATASETS.register_module()\nclass NuscenesOccupancy(NuScenesDataset):\n\n    CLASSES = [\n        \"empty\",\n        \"barrier\",\n        \"bicycle\",\n        \"bus\",\n        \"car\",\n        \"construction\",\n        \"motorcycle\",\n        \"pedestrian\",\n        \"trafficcone\",\n        \"trailer\",\n        \"truck\",\n        \"driveable_surface\",\n        \"other\",\n        \"sidewalk\",\n        \"terrain\",\n        \"mannade\",\n        \"vegetation\",\n    ]\n\n    def __init__(self, occupancy_info='data/nuscenes/occupancy_category.json', **kwargs):\n\n        super().__init__(**kwargs)\n        self.CLASSES = [\n            \"empty\",\n            \"barrier\",\n            \"bicycle\",\n            \"bus\",\n            \"car\",\n            \"construction\",\n            \"motorcycle\",\n            \"pedestrian\",\n            \"trafficcone\",\n            \"trailer\",\n            \"truck\",\n            \"driveable_surface\",\n            \"other\",\n            \"sidewalk\",\n            \"terrain\",\n            \"mannade\",\n            \"vegetation\",\n        ]\n\n        self.occupancy_info = mmcv.load(occupancy_info)\n\n    def get_cat_ids(self, idx):\n        \"\"\"Get category distribution of single scene.\n\n        Args:\n            idx (int): Index of the data_info.\n\n        Returns:\n            dict[list]: for each category, if the current scene\n                contains such boxes, store a list containing idx,\n                otherwise, store empty list.\n        \"\"\"\n        info = self.data_infos[idx]\n\n        token = info['token']\n        category = self.occupancy_info[token]\n        cat_ids = []\n        for k, v in category.items():\n            k = int(k)\n            if k == 17: continue\n            logv = max((np.log(v)/np.log(100)).round(),1)\n            cat_ids.extend([k] * int(logv))\n        return cat_ids\n\ndef lidar_nusc_box_to_global(info,\n                             boxes,\n                             classes,\n                             eval_configs,\n                             eval_version='detection_cvpr_2019'):\n    \"\"\"Convert the box from ego to global coordinate.\n\n    Args:\n        info (dict): Info for a specific sample data, including the\n            calibration information.\n        boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.\n        classes (list[str]): Mapped classes in the evaluation.\n        eval_configs (object): Evaluation configuration object.\n        eval_version (str, optional): Evaluation version.\n            Default: 'detection_cvpr_2019'\n\n    Returns:\n        list: List of standard NuScenesBoxes in the global\n            coordinate.\n    \"\"\"\n    box_list = []\n    for box in boxes:\n        # Move box to ego vehicle coord system\n        box.rotate(pyquaternion.Quaternion(info['lidar2ego_rotation']))\n        box.translate(np.array(info['lidar2ego_translation']))\n        # filter det in ego.\n        cls_range_map = eval_configs.class_range\n        radius = np.linalg.norm(box.center[:2], 2)\n        det_range = cls_range_map[classes[box.label]]\n        if radius > det_range:\n            continue\n        # Move box to global coord system\n        box.rotate(pyquaternion.Quaternion(info['ego2global_rotation']))\n        box.translate(np.array(info['ego2global_translation']))\n        box_list.append(box)\n    return box_list\n\n\n\ndef invert_matrix_egopose_numpy(egopose):\n    \"\"\" Compute the inverse transformation of a 4x4 egopose numpy matrix.\"\"\"\n    inverse_matrix = np.zeros((4, 4), dtype=np.float32)\n    rotation = egopose[:3, :3]\n    translation = egopose[:3, 3]\n    inverse_matrix[:3, :3] = rotation.T\n    inverse_matrix[:3, 3] = -np.dot(rotation.T, translation)\n    inverse_matrix[3, 3] = 1.0\n    return inverse_matrix\n\ndef convert_egopose_to_matrix_numpy(rotation, translation):\n    transformation_matrix = np.zeros((4, 4), dtype=np.float32)\n    transformation_matrix[:3, :3] = rotation\n    transformation_matrix[:3, 3] = translation\n    transformation_matrix[3, 3] = 1.0\n    return transformation_matrix\n\n\ndef output_to_vecs(detection):\n    # box3d = detection['map_boxes_3d'].numpy()\n    scores = detection['map_scores_3d'].numpy()\n    labels = detection['map_labels_3d'].numpy()\n    pts = detection['map_pts_3d'].numpy()\n\n    vec_list = []\n    # import pdb;pdb.set_trace()\n    for i in range(pts.shape[0]):\n        vec = dict(\n            bbox =[], # box3d[i], # xyxy\n            label=labels[i],\n            score=scores[i],\n            pts=pts[i],\n        )\n        vec_list.append(vec)\n    return vec_list"
  },
  {
    "path": "mmdet3d/datasets/nuscenes_eval.py",
    "content": "import argparse\nimport copy\nimport json\nimport os\nimport time\nfrom typing import Tuple, Dict, Any\nimport torch\nimport numpy as np\n\nfrom nuscenes import NuScenes\nfrom nuscenes.eval.common.config import config_factory\nfrom nuscenes.eval.common.data_classes import EvalBoxes\nfrom nuscenes.eval.detection.data_classes import DetectionConfig\nfrom nuscenes.eval.detection.evaluate import NuScenesEval\nfrom pyquaternion import Quaternion\n\nfrom nuscenes import NuScenes\nfrom nuscenes.eval.common.data_classes import EvalBoxes\nfrom nuscenes.eval.detection.data_classes import DetectionBox\nfrom nuscenes.eval.detection.utils import category_to_detection_name\nfrom nuscenes.eval.tracking.data_classes import TrackingBox\nfrom nuscenes.utils.data_classes import Box\nfrom nuscenes.utils.geometry_utils import points_in_box\nfrom nuscenes.utils.splits import create_splits_scenes\nfrom nuscenes.eval.common.loaders import load_prediction, add_center_dist, filter_eval_boxes\nimport tqdm\nfrom nuscenes.utils.geometry_utils import view_points, box_in_image, BoxVisibility, transform_matrix\nfrom torchvision.transforms.functional import rotate\nimport pycocotools.mask as mask_util\n# from projects.mmdet3d_plugin.models.utils.visual import save_tensor\nfrom torchvision.transforms.functional import rotate\nimport cv2\nimport argparse\nimport json\nimport os\nimport random\nimport time\nfrom typing import Tuple, Dict, Any\nimport math\nimport numpy as np\n\nfrom nuscenes import NuScenes\nfrom nuscenes.eval.common.config import config_factory\nfrom nuscenes.eval.common.data_classes import EvalBoxes\nfrom nuscenes.eval.common.loaders import load_prediction, load_gt, add_center_dist, filter_eval_boxes\nfrom nuscenes.eval.detection.algo import accumulate, calc_ap, calc_tp\nfrom nuscenes.eval.detection.constants import TP_METRICS\nfrom nuscenes.eval.detection.data_classes import DetectionConfig, DetectionMetrics, DetectionBox, \\\n    DetectionMetricDataList\nfrom nuscenes.eval.detection.render import summary_plot, class_pr_curve, dist_pr_curve, visualize_sample\nfrom nuscenes.eval.common.utils import quaternion_yaw, Quaternion\nfrom mmdet3d.core.bbox.iou_calculators import BboxOverlaps3D\nfrom IPython import embed\nimport json\nfrom typing import Any\n\nimport numpy as np\nfrom matplotlib import pyplot as plt\n\nfrom nuscenes import NuScenes\nfrom nuscenes.eval.common.data_classes import EvalBoxes\nfrom nuscenes.eval.common.render import setup_axis\nfrom nuscenes.eval.common.utils import boxes_to_sensor\nfrom nuscenes.eval.detection.constants import TP_METRICS, DETECTION_NAMES, DETECTION_COLORS, TP_METRICS_UNITS, \\\n    PRETTY_DETECTION_NAMES, PRETTY_TP_METRICS\nfrom nuscenes.eval.detection.data_classes import DetectionMetrics, DetectionMetricData, DetectionMetricDataList\nfrom nuscenes.utils.data_classes import LidarPointCloud\nfrom nuscenes.utils.geometry_utils import view_points\n\n\n\nAxis = Any\n\ndef class_tp_curve(md_list: DetectionMetricDataList,\n                   metrics: DetectionMetrics,\n                   detection_name: str,\n                   min_recall: float,\n                   dist_th_tp: float,\n                   savepath: str = None,\n                   ax: Axis = None) -> None:\n    \"\"\"\n    Plot the true positive curve for the specified class.\n    :param md_list: DetectionMetricDataList instance.\n    :param metrics: DetectionMetrics instance.\n    :param detection_name:\n    :param min_recall: Minimum recall value.\n    :param dist_th_tp: The distance threshold used to determine matches.\n    :param savepath: If given, saves the the rendering here instead of displaying.\n    :param ax: Axes onto which to render.\n    \"\"\"\n    # Get metric data for given detection class with tp distance threshold.\n\n    md = md_list[(detection_name, dist_th_tp)]\n    min_recall_ind = round(100 * min_recall)\n    if min_recall_ind <= md.max_recall_ind:\n        # For traffic_cone and barrier only a subset of the metrics are plotted.\n        rel_metrics = [m for m in TP_METRICS if not np.isnan(metrics.get_label_tp(detection_name, m))]\n        ylimit = max([max(getattr(md, metric)[min_recall_ind:md.max_recall_ind + 1]) for metric in rel_metrics]) * 1.1\n    else:\n        ylimit = 1.0\n\n    # Prepare axis.\n    if ax is None:\n        ax = setup_axis(title=PRETTY_DETECTION_NAMES[detection_name], xlabel='Recall', ylabel='Error', xlim=1,\n                        min_recall=min_recall)\n    ax.set_ylim(0, ylimit)\n\n    # Plot the recall vs. error curve for each tp metric.\n    for metric in TP_METRICS:\n        tp = metrics.get_label_tp(detection_name, metric)\n\n        # Plot only if we have valid data.\n        if tp is not np.nan and min_recall_ind <= md.max_recall_ind:\n            recall, error = md.recall[:md.max_recall_ind + 1], getattr(md, metric)[:md.max_recall_ind + 1]\n        else:\n            recall, error = [], []\n\n        # Change legend based on tp value\n        if tp is np.nan:\n            label = '{}: n/a'.format(PRETTY_TP_METRICS[metric])\n        elif min_recall_ind > md.max_recall_ind:\n            label = '{}: nan'.format(PRETTY_TP_METRICS[metric])\n        else:\n            label = '{}: {:.2f} ({})'.format(PRETTY_TP_METRICS[metric], tp, TP_METRICS_UNITS[metric])\n        if metric == 'trans_err':\n            label += f' ({md.max_recall_ind})'  # add recall\n            print(f'Recall: {detection_name}: {md.max_recall_ind/100}')\n        ax.plot(recall, error, label=label)\n    ax.axvline(x=md.max_recall, linestyle='-.', color=(0, 0, 0, 0.3))\n    ax.legend(loc='best')\n\n    if savepath is not None:\n        plt.savefig(savepath)\n        plt.close()\n\n\nclass DetectionBox_modified(DetectionBox):\n    def __init__(self, *args, token=None, visibility=None, index=None, **kwargs):\n        '''\n        add annotation token\n        '''\n        super().__init__(*args, **kwargs)\n        self.token = token\n        self.visibility = visibility\n        self.index = index\n\n    def serialize(self) -> dict:\n        \"\"\" Serialize instance into json-friendly format. \"\"\"\n        return {\n            'token': self.token,\n            'sample_token': self.sample_token,\n            'translation': self.translation,\n            'size': self.size,\n            'rotation': self.rotation,\n            'velocity': self.velocity,\n            'ego_translation': self.ego_translation,\n            'num_pts': self.num_pts,\n            'detection_name': self.detection_name,\n            'detection_score': self.detection_score,\n            'attribute_name': self.attribute_name,\n            'visibility': self.visibility,\n            'index': self.index\n\n        }\n\n    @classmethod\n    def deserialize(cls, content: dict):\n        \"\"\" Initialize from serialized content. \"\"\"\n        return cls(\n            token=content['token'],\n            sample_token=content['sample_token'],\n            translation=tuple(content['translation']),\n            size=tuple(content['size']),\n            rotation=tuple(content['rotation']),\n            velocity=tuple(content['velocity']),\n            ego_translation=(0.0, 0.0, 0.0) if 'ego_translation' not in content\n            else tuple(content['ego_translation']),\n            num_pts=-1 if 'num_pts' not in content else int(content['num_pts']),\n            detection_name=content['detection_name'],\n            detection_score=-1.0 if 'detection_score' not in content else float(content['detection_score']),\n            attribute_name=content['attribute_name'],\n            visibility=content['visibility'],\n            index=content['index'],\n        )\n\n\ndef center_in_image(box, intrinsic: np.ndarray, imsize: Tuple[int, int], vis_level: int = BoxVisibility.ANY) -> bool:\n    \"\"\"\n    Check if a box is visible inside an image without accounting for occlusions.\n    :param box: The box to be checked.\n    :param intrinsic: <float: 3, 3>. Intrinsic camera matrix.\n    :param imsize: (width, height).\n    :param vis_level: One of the enumerations of <BoxVisibility>.\n    :return True if visibility condition is satisfied.\n    \"\"\"\n\n    center_3d = box.center.reshape(3, 1)\n    center_img = view_points(center_3d, intrinsic, normalize=True)[:2, :]\n\n    visible = np.logical_and(center_img[0, :] > 0, center_img[0, :] < imsize[0])\n    visible = np.logical_and(visible, center_img[1, :] < imsize[1])\n    visible = np.logical_and(visible, center_img[1, :] > 0)\n    visible = np.logical_and(visible, center_3d[2, :] > 1)\n\n    in_front = center_3d[2, :] > 0.1  # True if a corner is at least 0.1 meter in front of the camera.\n\n    if vis_level == BoxVisibility.ALL:\n        return all(visible) and all(in_front)\n    elif vis_level == BoxVisibility.ANY:\n        return any(visible) and all(in_front)\n    elif vis_level == BoxVisibility.NONE:\n        return True\n    else:\n        raise ValueError(\"vis_level: {} not valid\".format(vis_level))\n\n\ndef exist_corners_in_image_but_not_all(box, intrinsic: np.ndarray, imsize: Tuple[int, int],\n                                       vis_level: int = BoxVisibility.ANY) -> bool:\n    \"\"\"\n    Check if a box is visible in images but not all corners in image .\n    :param box: The box to be checked.\n    :param intrinsic: <float: 3, 3>. Intrinsic camera matrix.\n    :param imsize: (width, height).\n    :param vis_level: One of the enumerations of <BoxVisibility>.\n    :return True if visibility condition is satisfied.\n    \"\"\"\n\n    corners_3d = box.corners()\n    corners_img = view_points(corners_3d, intrinsic, normalize=True)[:2, :]\n\n    visible = np.logical_and(corners_img[0, :] > 0, corners_img[0, :] < imsize[0])\n    visible = np.logical_and(visible, corners_img[1, :] < imsize[1])\n    visible = np.logical_and(visible, corners_img[1, :] > 0)\n    visible = np.logical_and(visible, corners_3d[2, :] > 1)\n\n    in_front = corners_3d[2, :] > 0.1  # True if a corner is at least 0.1 meter in front of the camera.\n\n    if any(visible) and not all(visible) and all(in_front):\n        return True\n    else:\n        return False\n\n\ndef load_gt(nusc: NuScenes, eval_split: str, box_cls, verbose: bool = False):\n    \"\"\"\n    Loads ground truth boxes from DB.\n    :param nusc: A NuScenes instance.\n    :param eval_split: The evaluation split for which we load GT boxes.\n    :param box_cls: Type of box to load, e.g. DetectionBox or TrackingBox.\n    :param verbose: Whether to print messages to stdout.\n    :return: The GT boxes.\n    \"\"\"\n\n    # Init.\n    if box_cls == DetectionBox_modified:\n        attribute_map = {a['token']: a['name'] for a in nusc.attribute}\n\n    if verbose:\n        print('Loading annotations for {} split from nuScenes version: {}'.format(eval_split, nusc.version))\n    # Read out all sample_tokens in DB.\n    sample_tokens_all = [s['token'] for s in nusc.sample]\n    assert len(sample_tokens_all) > 0, \"Error: Database has no samples!\"\n\n    # Only keep samples from this split.\n    splits = create_splits_scenes()\n\n    # Check compatibility of split with nusc_version.\n    version = nusc.version\n    if eval_split in {'train', 'val', 'train_detect', 'train_track'}:\n        assert version.endswith('trainval'), \\\n            'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version)\n    elif eval_split in {'mini_train', 'mini_val'}:\n        assert version.endswith('mini'), \\\n            'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version)\n    elif eval_split == 'test':\n        assert version.endswith('test'), \\\n            'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version)\n    else:\n        raise ValueError('Error: Requested split {} which this function cannot map to the correct NuScenes version.'\n                         .format(eval_split))\n\n    if eval_split == 'test':\n        # Check that you aren't trying to cheat :).\n        assert len(nusc.sample_annotation) > 0, \\\n            'Error: You are trying to evaluate on the test set but you do not have the annotations!'\n    index_map = {}\n    for scene in nusc.scene:\n        first_sample_token = scene['first_sample_token']\n        sample = nusc.get('sample', first_sample_token)\n        index_map[first_sample_token] = 1\n        index = 2\n        while sample['next'] != '':\n            sample = nusc.get('sample', sample['next'])\n            index_map[sample['token']] = index\n            index += 1\n\n    sample_tokens = []\n    for sample_token in sample_tokens_all:\n        scene_token = nusc.get('sample', sample_token)['scene_token']\n        scene_record = nusc.get('scene', scene_token)\n        if scene_record['name'] in splits[eval_split]:\n            sample_tokens.append(sample_token)\n\n    all_annotations = EvalBoxes()\n\n    # Load annotations and filter predictions and annotations.\n    tracking_id_set = set()\n    for sample_token in tqdm.tqdm(sample_tokens, leave=verbose):\n\n        sample = nusc.get('sample', sample_token)\n        sample_annotation_tokens = sample['anns']\n\n        sample_boxes = []\n        for sample_annotation_token in sample_annotation_tokens:\n\n            sample_annotation = nusc.get('sample_annotation', sample_annotation_token)\n            if box_cls == DetectionBox_modified:\n                # Get label name in detection task and filter unused labels.\n                detection_name = category_to_detection_name(sample_annotation['category_name'])\n                if detection_name is None:\n                    continue\n\n                # Get attribute_name.\n                attr_tokens = sample_annotation['attribute_tokens']\n                attr_count = len(attr_tokens)\n                if attr_count == 0:\n                    attribute_name = ''\n                elif attr_count == 1:\n                    attribute_name = attribute_map[attr_tokens[0]]\n                else:\n                    raise Exception('Error: GT annotations must not have more than one attribute!')\n\n                sample_boxes.append(\n                    box_cls(\n                        token=sample_annotation_token,\n                        sample_token=sample_token,\n                        translation=sample_annotation['translation'],\n                        size=sample_annotation['size'],\n                        rotation=sample_annotation['rotation'],\n                        velocity=nusc.box_velocity(sample_annotation['token'])[:2],\n                        num_pts=sample_annotation['num_lidar_pts'] + sample_annotation['num_radar_pts'],\n                        detection_name=detection_name,\n                        detection_score=-1.0,  # GT samples do not have a score.\n                        attribute_name=attribute_name,\n                        visibility=sample_annotation['visibility_token'],\n                        index=index_map[sample_token]\n                    )\n                )\n            elif box_cls == TrackingBox:\n                assert False\n            else:\n                raise NotImplementedError('Error: Invalid box_cls %s!' % box_cls)\n\n        all_annotations.add_boxes(sample_token, sample_boxes)\n\n    if verbose:\n        print(\"Loaded ground truth annotations for {} samples.\".format(len(all_annotations.sample_tokens)))\n\n    return all_annotations\n\n\ndef filter_eval_boxes_by_id(nusc: NuScenes,\n                            eval_boxes: EvalBoxes,\n                            id=None,\n                            verbose: bool = False) -> EvalBoxes:\n    \"\"\"\n    Applies filtering to boxes. Distance, bike-racks and points per box.\n    :param nusc: An instance of the NuScenes class.\n    :param eval_boxes: An instance of the EvalBoxes class.\n    :param is: the anns token set that used to keep bboxes.\n    :param verbose: Whether to print to stdout.\n    \"\"\"\n\n    # Accumulators for number of filtered boxes.\n    total, anns_filter = 0, 0\n    for ind, sample_token in enumerate(eval_boxes.sample_tokens):\n\n        # Filter on anns\n        total += len(eval_boxes[sample_token])\n        filtered_boxes = []\n        for box in eval_boxes[sample_token]:\n            if box.token in id:\n                filtered_boxes.append(box)\n        anns_filter += len(filtered_boxes)\n        eval_boxes.boxes[sample_token] = filtered_boxes\n\n    if verbose:\n        print(\"=> Original number of boxes: %d\" % total)\n        print(\"=> After anns based filtering: %d\" % anns_filter)\n\n    return eval_boxes\n\n\ndef filter_eval_boxes_by_visibility(\n        ori_eval_boxes: EvalBoxes,\n        visibility=None,\n        verbose: bool = False) -> EvalBoxes:\n    \"\"\"\n    Applies filtering to boxes. Distance, bike-racks and points per box.\n    :param nusc: An instance of the NuScenes class.\n    :param eval_boxes: An instance of the EvalBoxes class.\n    :param is: the anns token set that used to keep bboxes.\n    :param verbose: Whether to print to stdout.\n    \"\"\"\n\n    # Accumulators for number of filtered boxes.\n    eval_boxes = copy.deepcopy(ori_eval_boxes)\n    total, anns_filter = 0, 0\n    for ind, sample_token in enumerate(eval_boxes.sample_tokens):\n        # Filter on anns\n        total += len(eval_boxes[sample_token])\n        filtered_boxes = []\n        for box in eval_boxes[sample_token]:\n            if box.visibility == visibility:\n                filtered_boxes.append(box)\n        anns_filter += len(filtered_boxes)\n        eval_boxes.boxes[sample_token] = filtered_boxes\n\n    if verbose:\n        print(\"=> Original number of boxes: %d\" % total)\n        print(\"=> After visibility based filtering: %d\" % anns_filter)\n\n    return eval_boxes\n\n\ndef filter_by_sample_token(ori_eval_boxes, valid_sample_tokens=[],  verbose=False):\n    eval_boxes = copy.deepcopy(ori_eval_boxes)\n    for sample_token in eval_boxes.sample_tokens:\n        if sample_token not in valid_sample_tokens:\n            eval_boxes.boxes.pop(sample_token)\n    return eval_boxes\n\n\ndef filter_eval_boxes_by_overlap(nusc: NuScenes,\n                                 eval_boxes: EvalBoxes,\n                                 verbose: bool = False) -> EvalBoxes:\n    \"\"\"\n    Applies filtering to boxes. basedon overlap .\n    :param nusc: An instance of the NuScenes class.\n    :param eval_boxes: An instance of the EvalBoxes class.\n    :param verbose: Whether to print to stdout.\n    \"\"\"\n\n    # Accumulators for number of filtered boxes.\n    cams = ['CAM_FRONT',\n            'CAM_FRONT_RIGHT',\n            'CAM_BACK_RIGHT',\n            'CAM_BACK',\n            'CAM_BACK_LEFT',\n            'CAM_FRONT_LEFT']\n\n    total, anns_filter = 0, 0\n    for ind, sample_token in enumerate(eval_boxes.sample_tokens):\n\n        # Filter on anns\n        total += len(eval_boxes[sample_token])\n        sample_record = nusc.get('sample', sample_token)\n        filtered_boxes = []\n        for box in eval_boxes[sample_token]:\n            count = 0\n            for cam in cams:\n                '''\n                copy-paste form nuscens\n                '''\n                sample_data_token = sample_record['data'][cam]\n                sd_record = nusc.get('sample_data', sample_data_token)\n                cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])\n                sensor_record = nusc.get('sensor', cs_record['sensor_token'])\n                pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])\n                cam_intrinsic = np.array(cs_record['camera_intrinsic'])\n                imsize = (sd_record['width'], sd_record['height'])\n                new_box = Box(box.translation, box.size, Quaternion(box.rotation),\n                              name=box.detection_name, token='')\n\n                # Move box to ego vehicle coord system.\n                new_box.translate(-np.array(pose_record['translation']))\n                new_box.rotate(Quaternion(pose_record['rotation']).inverse)\n\n                #  Move box to sensor coord system.\n                new_box.translate(-np.array(cs_record['translation']))\n                new_box.rotate(Quaternion(cs_record['rotation']).inverse)\n\n                if center_in_image(new_box, cam_intrinsic, imsize, vis_level=BoxVisibility.ANY):\n                    count += 1\n                # if exist_corners_in_image_but_not_all(new_box, cam_intrinsic, imsize, vis_level=BoxVisibility.ANY):\n                #    count += 1\n\n            if count > 1:\n                with open('center_overlap.txt', 'a') as f:\n                    try:\n                        f.write(box.token + '\\n')\n                    except:\n                        pass\n                filtered_boxes.append(box)\n        anns_filter += len(filtered_boxes)\n        eval_boxes.boxes[sample_token] = filtered_boxes\n\n    verbose = True\n\n    if verbose:\n        print(\"=> Original number of boxes: %d\" % total)\n        print(\"=> After anns based filtering: %d\" % anns_filter)\n\n    return eval_boxes\n\n\n\n\ndef filter_eval_boxes_by_range(nusc: NuScenes,\n                                 eval_boxes: EvalBoxes,\n                                 verbose: bool = True,\n                                 min_=0,\n                                 max_=60,\n                                 ) -> EvalBoxes:\n    \"\"\"\n    Applies filtering to boxes. basedon overlap .\n    :param nusc: An instance of the NuScenes class.\n    :param eval_boxes: An instance of the EvalBoxes class.\n    :param verbose: Whether to print to stdout.\n    \"\"\"\n\n    # Accumulators for number of filtered boxes.\n    cams = ['CAM_FRONT',\n            'CAM_FRONT_RIGHT',\n            'CAM_BACK_RIGHT',\n            'CAM_BACK',\n            'CAM_BACK_LEFT',\n            'CAM_FRONT_LEFT']\n\n    total, anns_filter = 0, 0\n    for ind, sample_token in enumerate(eval_boxes.sample_tokens):\n\n        # Filter on anns\n        total += len(eval_boxes[sample_token])\n        sample_record = nusc.get('sample', sample_token)\n        filtered_boxes = []\n        for box in eval_boxes[sample_token]:\n            count = 0\n            sample_data_token = sample_record['data'][cams[0]]\n            sd_record = nusc.get('sample_data', sample_data_token)\n            cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])\n            sensor_record = nusc.get('sensor', cs_record['sensor_token'])\n            pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])\n            cam_intrinsic = np.array(cs_record['camera_intrinsic'])\n            imsize = (sd_record['width'], sd_record['height'])\n            new_box = Box(box.translation, box.size, Quaternion(box.rotation),\n                              name=box.detection_name, token='')\n                # Move box to ego vehicle coord system.\n            new_box.translate(-np.array(pose_record['translation']))\n            new_box.rotate(Quaternion(pose_record['rotation']).inverse)\n            x, y = new_box.center[:2]\n            dist = math.sqrt(x**2+y**2)\n            if dist<max_ and dist > min_:\n                filtered_boxes.append(box)\n        anns_filter += len(filtered_boxes)\n        eval_boxes.boxes[sample_token] = filtered_boxes\n\n\n    if verbose:\n        print(\"=> Original number of boxes: %d\" % total)\n\n\n\n\n        print(\"=> After range filtering: %d\" % anns_filter)\n\n    return eval_boxes\n\nclass NuScenesEval_custom(NuScenesEval):\n    \"\"\"\n    Dummy class for backward-compatibility. Same as DetectionEval.\n    \"\"\"\n\n    def __init__(self,\n                 nusc: NuScenes,\n                 config: DetectionConfig,\n                 result_path: str,\n                 eval_set: str,\n                 output_dir: str = None,\n                 verbose: bool = True,\n                 overlap_test=False,\n                 eval_mask=False,\n                 data_infos=None\n                 ):\n        \"\"\"\n        Initialize a DetectionEval object.\n        :param nusc: A NuScenes object.\n        :param config: A DetectionConfig object.\n        :param result_path: Path of the nuScenes JSON result file.\n        :param eval_set: The dataset split to evaluate on, e.g. train, val or test.\n        :param output_dir: Folder to save plots and results to.\n        :param verbose: Whether to print to stdout.\n        \"\"\"\n\n        self.nusc = nusc\n        self.result_path = result_path\n        self.eval_set = eval_set\n        self.output_dir = output_dir\n        self.verbose = verbose\n        self.cfg = config\n        self.overlap_test = overlap_test\n        self.eval_mask = eval_mask\n        self.data_infos = data_infos\n        # Check result file exists.\n        assert os.path.exists(result_path), 'Error: The result file does not exist!'\n\n        # Make dirs.\n        self.plot_dir = os.path.join(self.output_dir, 'plots')\n        if not os.path.isdir(self.output_dir):\n            os.makedirs(self.output_dir)\n        if not os.path.isdir(self.plot_dir):\n            os.makedirs(self.plot_dir)\n\n        # Load data.\n        if verbose:\n            print('Initializing nuScenes detection evaluation')\n        self.pred_boxes, self.meta = load_prediction(self.result_path, self.cfg.max_boxes_per_sample, DetectionBox,\n                                                     verbose=verbose)\n        self.gt_boxes = load_gt(self.nusc, self.eval_set, DetectionBox_modified, verbose=verbose)\n\n        assert set(self.pred_boxes.sample_tokens) == set(self.gt_boxes.sample_tokens), \\\n            \"Samples in split doesn't match samples in predictions.\"\n\n        # Add center distances.\n        self.pred_boxes = add_center_dist(nusc, self.pred_boxes)\n        self.gt_boxes = add_center_dist(nusc, self.gt_boxes)\n\n        # Filter boxes (distance, points per box, etc.).\n\n        if verbose:\n            print('Filtering predictions')\n        self.pred_boxes = filter_eval_boxes(nusc, self.pred_boxes, self.cfg.class_range, verbose=verbose)\n        if verbose:\n            print('Filtering ground truth annotations')\n        self.gt_boxes = filter_eval_boxes(nusc, self.gt_boxes, self.cfg.class_range, verbose=verbose)\n\n        if self.overlap_test:\n            self.pred_boxes = filter_eval_boxes_by_overlap(self.nusc, self.pred_boxes)\n\n            self.gt_boxes = filter_eval_boxes_by_overlap(self.nusc, self.gt_boxes, verbose=True)\n\n        min_ = 25\n        max_ = 60\n        self.pred_boxes = filter_eval_boxes_by_range(self.nusc, self.pred_boxes, min_=min_-2, max_=max_+2)\n        self.gt_boxes = filter_eval_boxes_by_range(self.nusc, self.gt_boxes, min_=min_, max_=max_)\n        \n        self.all_gt = copy.deepcopy(self.gt_boxes)\n        self.all_preds = copy.deepcopy(self.pred_boxes)\n        self.sample_tokens = self.gt_boxes.sample_tokens\n\n        self.index_map = {}\n        for scene in nusc.scene:\n            first_sample_token = scene['first_sample_token']\n            sample = nusc.get('sample', first_sample_token)\n            self.index_map[first_sample_token] = 1\n            index = 2\n            while sample['next'] != '':\n                sample = nusc.get('sample', sample['next'])\n                self.index_map[sample['token']] = index\n                index += 1\n\n    def update_gt(self, type_='vis', visibility='1', index=1):\n        if type_ == 'vis':\n            self.visibility_test = True\n            if self.visibility_test:\n                '''[{'description': 'visibility of whole object is between 0 and 40%',\n                'token': '1',\n                'level': 'v0-40'},\n                {'description': 'visibility of whole object is between 40 and 60%',\n                'token': '2',\n                'level': 'v40-60'},\n                {'description': 'visibility of whole object is between 60 and 80%',\n                'token': '3',\n                'level': 'v60-80'},\n                {'description': 'visibility of whole object is between 80 and 100%',\n                'token': '4',\n                'level': 'v80-100'}]'''\n\n                self.gt_boxes = filter_eval_boxes_by_visibility(self.all_gt, visibility, verbose=True)\n\n        elif type_ == 'ord':\n\n            valid_tokens = [key for (key, value) in self.index_map.items() if value == index]\n            # from IPython import embed\n            # embed()\n            self.gt_boxes = filter_by_sample_token(self.all_gt, valid_tokens)\n            self.pred_boxes = filter_by_sample_token(self.all_preds, valid_tokens)\n        self.sample_tokens = self.gt_boxes.sample_tokens\n\n\n    def evaluate(self) -> Tuple[DetectionMetrics, DetectionMetricDataList]:\n        \"\"\"\n        Performs the actual evaluation.\n        :return: A tuple of high-level and the raw metric data.\n        \"\"\"\n        start_time = time.time()\n\n        # -----------------------------------\n        # Step 1: Accumulate metric data for all classes and distance thresholds.\n        # -----------------------------------\n        if self.verbose:\n            print('Accumulating metric data...')\n        metric_data_list = DetectionMetricDataList()\n\n        # print(self.cfg.dist_fcn_callable, self.cfg.dist_ths)\n        # self.cfg.dist_ths = [0.3]\n        # self.cfg.dist_fcn_callable\n        for class_name in self.cfg.class_names:\n            for dist_th in self.cfg.dist_ths:\n                md = accumulate(self.gt_boxes, self.pred_boxes, class_name, self.cfg.dist_fcn_callable, dist_th)\n                metric_data_list.set(class_name, dist_th, md)\n\n        # -----------------------------------\n        # Step 2: Calculate metrics from the data.\n        # -----------------------------------\n        if self.verbose:\n            print('Calculating metrics...')\n        metrics = DetectionMetrics(self.cfg)\n        for class_name in self.cfg.class_names:\n            # Compute APs.\n            for dist_th in self.cfg.dist_ths:\n                metric_data = metric_data_list[(class_name, dist_th)]\n                ap = calc_ap(metric_data, self.cfg.min_recall, self.cfg.min_precision)\n                metrics.add_label_ap(class_name, dist_th, ap)\n            # Compute TP metrics.\n            for metric_name in TP_METRICS:\n                metric_data = metric_data_list[(class_name, self.cfg.dist_th_tp)]\n                if class_name in ['traffic_cone'] and metric_name in ['attr_err', 'vel_err', 'orient_err']:\n                    tp = np.nan\n                elif class_name in ['barrier'] and metric_name in ['attr_err', 'vel_err']:\n                    tp = np.nan\n                else:\n                    tp = calc_tp(metric_data, self.cfg.min_recall, metric_name)\n                metrics.add_label_tp(class_name, metric_name, tp)\n\n        # Compute evaluation time.\n        metrics.add_runtime(time.time() - start_time)\n\n        return metrics, metric_data_list\n\n    def render(self, metrics: DetectionMetrics, md_list: DetectionMetricDataList) -> None:\n        \"\"\"\n        Renders various PR and TP curves.\n        :param metrics: DetectionMetrics instance.\n        :param md_list: DetectionMetricDataList instance.\n        \"\"\"\n        if self.verbose:\n            print('Rendering PR and TP curves')\n\n        def savepath(name):\n            return os.path.join(self.plot_dir, name + '.pdf')\n\n        summary_plot(md_list, metrics, min_precision=self.cfg.min_precision, min_recall=self.cfg.min_recall,\n                     dist_th_tp=self.cfg.dist_th_tp, savepath=savepath('summary'))\n\n        for detection_name in self.cfg.class_names:\n            class_pr_curve(md_list, metrics, detection_name, self.cfg.min_precision, self.cfg.min_recall,\n                           savepath=savepath(detection_name + '_pr'))\n\n            class_tp_curve(md_list, metrics, detection_name, self.cfg.min_recall, self.cfg.dist_th_tp,\n                           savepath=savepath(detection_name + '_tp'))\n\n        for dist_th in self.cfg.dist_ths:\n            dist_pr_curve(md_list, metrics, dist_th, self.cfg.min_precision, self.cfg.min_recall,\n                          savepath=savepath('dist_pr_' + str(dist_th)))\n\n    def evaluate_mask(self, preds, HDMap, Bbox_mask):\n\n        if preds is None:\n            return {}\n        self.HDMap = HDMap\n        self.Bbox_mask = Bbox_mask\n        tokens = [each['token'] for each in self.data_infos]\n        mask_shape_flag = f'{HDMap.canvas_size[0]}_{HDMap.canvas_size[1]}_{HDMap.grid_length}'\n\n        # print(mask_shape)\n        try:\n            masks_gt = np.load(f'.cache/mask_gt_{mask_shape_flag}.npy')\n        except:\n            map_masks_gt = self.prepare_map_mask_gt(tokens)\n            bbox_masks_gt = self.prepare_bbox_mask_gt(tokens)\n            masks_gt = np.concatenate([bbox_masks_gt, map_masks_gt], 1)\n            np.save(f'.cache/mask_gt_{mask_shape_flag}.npy', masks_gt)\n\n        gt_list = [[], [], [], [], [], []]\n        pred_list = [[], [], [], [], [], []]\n        for i, (token, pred, gt) in enumerate(zip(tokens, preds, masks_gt)):\n            preds_mask = []\n            for mask in pred:\n                preds_mask.append(mask_util.decode(mask))\n            preds_mask = np.stack(preds_mask)\n\n            gt = torch.tensor(gt)\n            gt = torch.flip(gt, [1, 2])\n\n            preds_mask = torch.tensor(preds_mask)\n            preds_mask = torch.flip(preds_mask, [1])\n\n            for j in range(6):\n                gt_list[j].append(gt[j].reshape(-1))\n                pred_list[j].append(preds_mask[j].reshape(-1))\n\n            # ti = time.time()\n            # save_tensor(preds_mask, f'masks/{token}preds.png')\n            # save_tensor(gt, f'masks/{token}gt.png')\n        # bbox_masks = self.repare_bbox_mask_gt()\n        class_names = [\n            'car', 'vehicle', 'ped', 'divider', 'boundary', 'drivable', 'lane'\n        ]\n        results = {}\n        for i, name in enumerate(class_names[:-1]):\n            results[name] = self.get_batch_iou(torch.stack(gt_list[i]), torch.stack(pred_list[i]))\n        # embed()\n        # exit()\n        results['lane'] = self.get_batch_iou(torch.stack(gt_list[-3]) | torch.stack(gt_list[-2]),\n                                             torch.stack(pred_list[-2]) | torch.stack(pred_list[-3]))\n\n        return results\n\nif __name__ == \"__main__\":\n\n    # Settings.\n    parser = argparse.ArgumentParser(description='Evaluate nuScenes detection results.',\n                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)\n    parser.add_argument('result_path', type=str, help='The submission as a JSON file.')\n    parser.add_argument('--output_dir', type=str, default='~/nuscenes-metrics',\n                        help='Folder to store result metrics, graphs and example visualizations.')\n    parser.add_argument('--eval_set', type=str, default='val',\n                        help='Which dataset split to evaluate on, train, val or test.')\n    parser.add_argument('--dataroot', type=str, default='data/nuscenes',\n                        help='Default nuScenes data directory.')\n    parser.add_argument('--version', type=str, default='v1.0-trainval',\n                        help='Which version of the nuScenes dataset to evaluate on, e.g. v1.0-trainval.')\n    parser.add_argument('--config_path', type=str, default='',\n                        help='Path to the configuration file.'\n                             'If no path given, the CVPR 2019 configuration will be used.')\n    parser.add_argument('--plot_examples', type=int, default=0,\n                        help='How many example visualizations to write to disk.')\n    parser.add_argument('--render_curves', type=int, default=1,\n                        help='Whether to render PR and TP curves to disk.')\n    parser.add_argument('--verbose', type=int, default=1,\n                        help='Whether to print to stdout.')\n    args = parser.parse_args()\n\n    result_path_ = os.path.expanduser(args.result_path)\n    output_dir_ = os.path.expanduser(args.output_dir)\n    eval_set_ = args.eval_set\n    dataroot_ = args.dataroot\n    version_ = args.version\n    config_path = args.config_path\n    plot_examples_ = args.plot_examples\n    render_curves_ = bool(args.render_curves)\n    verbose_ = bool(args.verbose)\n\n    if config_path == '':\n        cfg_ = config_factory('detection_cvpr_2019')\n    else:\n        with open(config_path, 'r') as _f:\n            cfg_ = DetectionConfig.deserialize(json.load(_f))\n\n    nusc_ = NuScenes(version=version_, verbose=verbose_, dataroot=dataroot_)\n    nusc_eval = NuScenesEval_custom(nusc_, config=cfg_, result_path=result_path_, eval_set=eval_set_,\n                                    output_dir=output_dir_, verbose=verbose_)\n    for vis in ['1', '2', '3', '4']:\n        nusc_eval.update_gt(type_='vis', visibility=vis)\n        print(f'================ {vis} ===============')\n        nusc_eval.main(plot_examples=plot_examples_, render_curves=render_curves_)\n    #for index in range(1, 41):\n    #    nusc_eval.update_gt(type_='ord', index=index)\n    #\n"
  },
  {
    "path": "mmdet3d/datasets/nuscenes_mono_dataset.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\nimport tempfile\nimport warnings\nfrom os import path as osp\n\nimport mmcv\nimport numpy as np\nimport pyquaternion\nimport torch\nfrom nuscenes.utils.data_classes import Box as NuScenesBox\n\nfrom mmdet3d.core import bbox3d2result, box3d_multiclass_nms, xywhr2xyxyr\nfrom mmdet.datasets import CocoDataset\nfrom ..core import show_multi_modality_result\nfrom ..core.bbox import CameraInstance3DBoxes, get_box_type\nfrom .builder import DATASETS\nfrom .pipelines import Compose\nfrom .utils import extract_result_dict, get_loading_pipeline\n\n\n@DATASETS.register_module()\nclass NuScenesMonoDataset(CocoDataset):\n    r\"\"\"Monocular 3D detection on NuScenes Dataset.\n\n    This class serves as the API for experiments on the NuScenes Dataset.\n\n    Please refer to `NuScenes Dataset <https://www.nuscenes.org/download>`_\n    for data downloading.\n\n    Args:\n        ann_file (str): Path of annotation file.\n        data_root (str): Path of dataset root.\n        load_interval (int, optional): Interval of loading the dataset. It is\n            used to uniformly sample the dataset. Defaults to 1.\n        with_velocity (bool, optional): Whether include velocity prediction\n            into the experiments. Defaults to True.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        box_type_3d (str, optional): Type of 3D box of this dataset.\n            Based on the `box_type_3d`, the dataset will encapsulate the box\n            to its original format then converted them to `box_type_3d`.\n            Defaults to 'Camera' in this class. Available options includes.\n            - 'LiDAR': Box in LiDAR coordinates.\n            - 'Depth': Box in depth coordinates, usually for indoor dataset.\n            - 'Camera': Box in camera coordinates.\n        eval_version (str, optional): Configuration version of evaluation.\n            Defaults to  'detection_cvpr_2019'.\n        use_valid_flag (bool, optional): Whether to use `use_valid_flag` key\n            in the info file as mask to filter gt_boxes and gt_names.\n            Defaults to False.\n        version (str, optional): Dataset version. Defaults to 'v1.0-trainval'.\n    \"\"\"\n    CLASSES = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',\n               'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',\n               'barrier')\n    DefaultAttribute = {\n        'car': 'vehicle.parked',\n        'pedestrian': 'pedestrian.moving',\n        'trailer': 'vehicle.parked',\n        'truck': 'vehicle.parked',\n        'bus': 'vehicle.moving',\n        'motorcycle': 'cycle.without_rider',\n        'construction_vehicle': 'vehicle.parked',\n        'bicycle': 'cycle.without_rider',\n        'barrier': '',\n        'traffic_cone': '',\n    }\n    # https://github.com/nutonomy/nuscenes-devkit/blob/57889ff20678577025326cfc24e57424a829be0a/python-sdk/nuscenes/eval/detection/evaluate.py#L222 # noqa\n    ErrNameMapping = {\n        'trans_err': 'mATE',\n        'scale_err': 'mASE',\n        'orient_err': 'mAOE',\n        'vel_err': 'mAVE',\n        'attr_err': 'mAAE'\n    }\n\n    def __init__(self,\n                 data_root,\n                 ann_file,\n                 pipeline,\n                 load_interval=1,\n                 with_velocity=True,\n                 modality=None,\n                 box_type_3d='Camera',\n                 eval_version='detection_cvpr_2019',\n                 use_valid_flag=False,\n                 version='v1.0-trainval',\n                 classes=None,\n                 img_prefix='',\n                 seg_prefix=None,\n                 proposal_file=None,\n                 test_mode=False,\n                 filter_empty_gt=True,\n                 file_client_args=dict(backend='disk')):\n        self.ann_file = ann_file\n        self.data_root = data_root\n        self.img_prefix = img_prefix\n        self.seg_prefix = seg_prefix\n        self.proposal_file = proposal_file\n        self.test_mode = test_mode\n        self.filter_empty_gt = filter_empty_gt\n        self.CLASSES = self.get_classes(classes)\n        self.file_client = mmcv.FileClient(**file_client_args)\n\n        # load annotations (and proposals)\n        with self.file_client.get_local_path(self.ann_file) as local_path:\n            self.data_infos = self.load_annotations(local_path)\n\n        if self.proposal_file is not None:\n            with self.file_client.get_local_path(\n                    self.proposal_file) as local_path:\n                self.proposals = self.load_proposals(local_path)\n        else:\n            self.proposals = None\n\n        # filter images too small and containing no annotations\n        if not test_mode:\n            valid_inds = self._filter_imgs()\n            self.data_infos = [self.data_infos[i] for i in valid_inds]\n            if self.proposals is not None:\n                self.proposals = [self.proposals[i] for i in valid_inds]\n            # set group flag for the sampler\n            self._set_group_flag()\n\n        # processing pipeline\n        self.pipeline = Compose(pipeline)\n\n        self.load_interval = load_interval\n        self.with_velocity = with_velocity\n        self.modality = modality\n        self.box_type_3d, self.box_mode_3d = get_box_type(box_type_3d)\n        self.eval_version = eval_version\n        self.use_valid_flag = use_valid_flag\n        self.bbox_code_size = 9\n        self.version = version\n        if self.eval_version is not None:\n            from nuscenes.eval.detection.config import config_factory\n            self.eval_detection_configs = config_factory(self.eval_version)\n        if self.modality is None:\n            self.modality = dict(\n                use_camera=True,\n                use_lidar=False,\n                use_radar=False,\n                use_map=False,\n                use_external=False)\n\n    def pre_pipeline(self, results):\n        \"\"\"Initialization before data preparation.\n\n        Args:\n            results (dict): Dict before data preprocessing.\n\n                - img_fields (list): Image fields.\n                - bbox3d_fields (list): 3D bounding boxes fields.\n                - pts_mask_fields (list): Mask fields of points.\n                - pts_seg_fields (list): Mask fields of point segments.\n                - bbox_fields (list): Fields of bounding boxes.\n                - mask_fields (list): Fields of masks.\n                - seg_fields (list): Segment fields.\n                - box_type_3d (str): 3D box type.\n                - box_mode_3d (str): 3D box mode.\n        \"\"\"\n        results['img_prefix'] = self.img_prefix\n        results['seg_prefix'] = self.seg_prefix\n        results['proposal_file'] = self.proposal_file\n        results['img_fields'] = []\n        results['bbox3d_fields'] = []\n        results['pts_mask_fields'] = []\n        results['pts_seg_fields'] = []\n        results['bbox_fields'] = []\n        results['mask_fields'] = []\n        results['seg_fields'] = []\n        results['box_type_3d'] = self.box_type_3d\n        results['box_mode_3d'] = self.box_mode_3d\n\n    def _parse_ann_info(self, img_info, ann_info):\n        \"\"\"Parse bbox annotation.\n\n        Args:\n            img_info (list[dict]): Image info.\n            ann_info (list[dict]): Annotation info of an image.\n\n        Returns:\n            dict: A dict containing the following keys: bboxes, labels,\n                gt_bboxes_3d, gt_labels_3d, attr_labels, centers2d,\n                depths, bboxes_ignore, masks, seg_map\n        \"\"\"\n        gt_bboxes = []\n        gt_labels = []\n        attr_labels = []\n        gt_bboxes_ignore = []\n        gt_masks_ann = []\n        gt_bboxes_cam3d = []\n        centers2d = []\n        depths = []\n        for i, ann in enumerate(ann_info):\n            if ann.get('ignore', False):\n                continue\n            x1, y1, w, h = ann['bbox']\n            inter_w = max(0, min(x1 + w, img_info['width']) - max(x1, 0))\n            inter_h = max(0, min(y1 + h, img_info['height']) - max(y1, 0))\n            if inter_w * inter_h == 0:\n                continue\n            if ann['area'] <= 0 or w < 1 or h < 1:\n                continue\n            if ann['category_id'] not in self.cat_ids:\n                continue\n            bbox = [x1, y1, x1 + w, y1 + h]\n            if ann.get('iscrowd', False):\n                gt_bboxes_ignore.append(bbox)\n            else:\n                gt_bboxes.append(bbox)\n                gt_labels.append(self.cat2label[ann['category_id']])\n                attr_labels.append(ann['attribute_id'])\n                gt_masks_ann.append(ann.get('segmentation', None))\n                # 3D annotations in camera coordinates\n                bbox_cam3d = np.array(ann['bbox_cam3d']).reshape(1, -1)\n                velo_cam3d = np.array(ann['velo_cam3d']).reshape(1, 2)\n                nan_mask = np.isnan(velo_cam3d[:, 0])\n                velo_cam3d[nan_mask] = [0.0, 0.0]\n                bbox_cam3d = np.concatenate([bbox_cam3d, velo_cam3d], axis=-1)\n                gt_bboxes_cam3d.append(bbox_cam3d.squeeze())\n                # 2.5D annotations in camera coordinates\n                center2d = ann['center2d'][:2]\n                depth = ann['center2d'][2]\n                centers2d.append(center2d)\n                depths.append(depth)\n\n        if gt_bboxes:\n            gt_bboxes = np.array(gt_bboxes, dtype=np.float32)\n            gt_labels = np.array(gt_labels, dtype=np.int64)\n            attr_labels = np.array(attr_labels, dtype=np.int64)\n        else:\n            gt_bboxes = np.zeros((0, 4), dtype=np.float32)\n            gt_labels = np.array([], dtype=np.int64)\n            attr_labels = np.array([], dtype=np.int64)\n\n        if gt_bboxes_cam3d:\n            gt_bboxes_cam3d = np.array(gt_bboxes_cam3d, dtype=np.float32)\n            centers2d = np.array(centers2d, dtype=np.float32)\n            depths = np.array(depths, dtype=np.float32)\n        else:\n            gt_bboxes_cam3d = np.zeros((0, self.bbox_code_size),\n                                       dtype=np.float32)\n            centers2d = np.zeros((0, 2), dtype=np.float32)\n            depths = np.zeros((0), dtype=np.float32)\n\n        gt_bboxes_cam3d = CameraInstance3DBoxes(\n            gt_bboxes_cam3d,\n            box_dim=gt_bboxes_cam3d.shape[-1],\n            origin=(0.5, 0.5, 0.5))\n        gt_labels_3d = copy.deepcopy(gt_labels)\n\n        if gt_bboxes_ignore:\n            gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)\n        else:\n            gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)\n\n        seg_map = img_info['filename'].replace('jpg', 'png')\n\n        ann = dict(\n            bboxes=gt_bboxes,\n            labels=gt_labels,\n            gt_bboxes_3d=gt_bboxes_cam3d,\n            gt_labels_3d=gt_labels_3d,\n            attr_labels=attr_labels,\n            centers2d=centers2d,\n            depths=depths,\n            bboxes_ignore=gt_bboxes_ignore,\n            masks=gt_masks_ann,\n            seg_map=seg_map)\n\n        return ann\n\n    def get_attr_name(self, attr_idx, label_name):\n        \"\"\"Get attribute from predicted index.\n\n        This is a workaround to predict attribute when the predicted velocity\n        is not reliable. We map the predicted attribute index to the one\n        in the attribute set. If it is consistent with the category, we will\n        keep it. Otherwise, we will use the default attribute.\n\n        Args:\n            attr_idx (int): Attribute index.\n            label_name (str): Predicted category name.\n\n        Returns:\n            str: Predicted attribute name.\n        \"\"\"\n        # TODO: Simplify the variable name\n        AttrMapping_rev2 = [\n            'cycle.with_rider', 'cycle.without_rider', 'pedestrian.moving',\n            'pedestrian.standing', 'pedestrian.sitting_lying_down',\n            'vehicle.moving', 'vehicle.parked', 'vehicle.stopped', 'None'\n        ]\n        if label_name == 'car' or label_name == 'bus' \\\n            or label_name == 'truck' or label_name == 'trailer' \\\n                or label_name == 'construction_vehicle':\n            if AttrMapping_rev2[attr_idx] == 'vehicle.moving' or \\\n                AttrMapping_rev2[attr_idx] == 'vehicle.parked' or \\\n                    AttrMapping_rev2[attr_idx] == 'vehicle.stopped':\n                return AttrMapping_rev2[attr_idx]\n            else:\n                return NuScenesMonoDataset.DefaultAttribute[label_name]\n        elif label_name == 'pedestrian':\n            if AttrMapping_rev2[attr_idx] == 'pedestrian.moving' or \\\n                AttrMapping_rev2[attr_idx] == 'pedestrian.standing' or \\\n                    AttrMapping_rev2[attr_idx] == \\\n                    'pedestrian.sitting_lying_down':\n                return AttrMapping_rev2[attr_idx]\n            else:\n                return NuScenesMonoDataset.DefaultAttribute[label_name]\n        elif label_name == 'bicycle' or label_name == 'motorcycle':\n            if AttrMapping_rev2[attr_idx] == 'cycle.with_rider' or \\\n                    AttrMapping_rev2[attr_idx] == 'cycle.without_rider':\n                return AttrMapping_rev2[attr_idx]\n            else:\n                return NuScenesMonoDataset.DefaultAttribute[label_name]\n        else:\n            return NuScenesMonoDataset.DefaultAttribute[label_name]\n\n    def _format_bbox(self, results, jsonfile_prefix=None):\n        \"\"\"Convert the results to the standard format.\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            jsonfile_prefix (str): The prefix of the output jsonfile.\n                You can specify the output directory/filename by\n                modifying the jsonfile_prefix. Default: None.\n\n        Returns:\n            str: Path of the output json file.\n        \"\"\"\n        nusc_annos = {}\n        mapped_class_names = self.CLASSES\n\n        print('Start to convert detection format...')\n\n        CAM_NUM = 6\n\n        for sample_id, det in enumerate(mmcv.track_iter_progress(results)):\n\n            if sample_id % CAM_NUM == 0:\n                boxes_per_frame = []\n                attrs_per_frame = []\n\n            # need to merge results from images of the same sample\n            annos = []\n            boxes, attrs = output_to_nusc_box(det)\n            sample_token = self.data_infos[sample_id]['token']\n            boxes, attrs = cam_nusc_box_to_global(self.data_infos[sample_id],\n                                                  boxes, attrs,\n                                                  mapped_class_names,\n                                                  self.eval_detection_configs,\n                                                  self.eval_version)\n\n            boxes_per_frame.extend(boxes)\n            attrs_per_frame.extend(attrs)\n            # Remove redundant predictions caused by overlap of images\n            if (sample_id + 1) % CAM_NUM != 0:\n                continue\n            boxes = global_nusc_box_to_cam(\n                self.data_infos[sample_id + 1 - CAM_NUM], boxes_per_frame,\n                mapped_class_names, self.eval_detection_configs,\n                self.eval_version)\n            cam_boxes3d, scores, labels = nusc_box_to_cam_box3d(boxes)\n            # box nms 3d over 6 images in a frame\n            # TODO: move this global setting into config\n            nms_cfg = dict(\n                use_rotate_nms=True,\n                nms_across_levels=False,\n                nms_pre=4096,\n                nms_thr=0.05,\n                score_thr=0.01,\n                min_bbox_size=0,\n                max_per_frame=500)\n            from mmcv import Config\n            nms_cfg = Config(nms_cfg)\n            cam_boxes3d_for_nms = xywhr2xyxyr(cam_boxes3d.bev)\n            boxes3d = cam_boxes3d.tensor\n            # generate attr scores from attr labels\n            attrs = labels.new_tensor([attr for attr in attrs_per_frame])\n            boxes3d, scores, labels, attrs = box3d_multiclass_nms(\n                boxes3d,\n                cam_boxes3d_for_nms,\n                scores,\n                nms_cfg.score_thr,\n                nms_cfg.max_per_frame,\n                nms_cfg,\n                mlvl_attr_scores=attrs)\n            cam_boxes3d = CameraInstance3DBoxes(boxes3d, box_dim=9)\n            det = bbox3d2result(cam_boxes3d, scores, labels, attrs)\n            boxes, attrs = output_to_nusc_box(det)\n            boxes, attrs = cam_nusc_box_to_global(\n                self.data_infos[sample_id + 1 - CAM_NUM], boxes, attrs,\n                mapped_class_names, self.eval_detection_configs,\n                self.eval_version)\n\n            for i, box in enumerate(boxes):\n                name = mapped_class_names[box.label]\n                attr = self.get_attr_name(attrs[i], name)\n                nusc_anno = dict(\n                    sample_token=sample_token,\n                    translation=box.center.tolist(),\n                    size=box.wlh.tolist(),\n                    rotation=box.orientation.elements.tolist(),\n                    velocity=box.velocity[:2].tolist(),\n                    detection_name=name,\n                    detection_score=box.score,\n                    attribute_name=attr)\n                annos.append(nusc_anno)\n            # other views results of the same frame should be concatenated\n            if sample_token in nusc_annos:\n                nusc_annos[sample_token].extend(annos)\n            else:\n                nusc_annos[sample_token] = annos\n\n        nusc_submissions = {\n            'meta': self.modality,\n            'results': nusc_annos,\n        }\n\n        mmcv.mkdir_or_exist(jsonfile_prefix)\n        res_path = osp.join(jsonfile_prefix, 'results_nusc.json')\n        print('Results writes to', res_path)\n        mmcv.dump(nusc_submissions, res_path)\n        return res_path\n\n    def _evaluate_single(self,\n                         result_path,\n                         logger=None,\n                         metric='bbox',\n                         result_name='img_bbox'):\n        \"\"\"Evaluation for a single model in nuScenes protocol.\n\n        Args:\n            result_path (str): Path of the result file.\n            logger (logging.Logger | str, optional): Logger used for printing\n                related information during evaluation. Default: None.\n            metric (str, optional): Metric name used for evaluation.\n                Default: 'bbox'.\n            result_name (str, optional): Result name in the metric prefix.\n                Default: 'img_bbox'.\n\n        Returns:\n            dict: Dictionary of evaluation details.\n        \"\"\"\n        from nuscenes import NuScenes\n        from nuscenes.eval.detection.evaluate import NuScenesEval\n\n        output_dir = osp.join(*osp.split(result_path)[:-1])\n        nusc = NuScenes(\n            version=self.version, dataroot=self.data_root, verbose=False)\n        eval_set_map = {\n            'v1.0-mini': 'mini_val',\n            'v1.0-trainval': 'val',\n        }\n        nusc_eval = NuScenesEval(\n            nusc,\n            config=self.eval_detection_configs,\n            result_path=result_path,\n            eval_set=eval_set_map[self.version],\n            output_dir=output_dir,\n            verbose=False)\n        nusc_eval.main(render_curves=True)\n\n        # record metrics\n        metrics = mmcv.load(osp.join(output_dir, 'metrics_summary.json'))\n        detail = dict()\n        metric_prefix = f'{result_name}_NuScenes'\n        for name in self.CLASSES:\n            for k, v in metrics['label_aps'][name].items():\n                val = float('{:.4f}'.format(v))\n                detail['{}/{}_AP_dist_{}'.format(metric_prefix, name, k)] = val\n            for k, v in metrics['label_tp_errors'][name].items():\n                val = float('{:.4f}'.format(v))\n                detail['{}/{}_{}'.format(metric_prefix, name, k)] = val\n            for k, v in metrics['tp_errors'].items():\n                val = float('{:.4f}'.format(v))\n                detail['{}/{}'.format(metric_prefix,\n                                      self.ErrNameMapping[k])] = val\n\n        detail['{}/NDS'.format(metric_prefix)] = metrics['nd_score']\n        detail['{}/mAP'.format(metric_prefix)] = metrics['mean_ap']\n        return detail\n\n    def format_results(self, results, jsonfile_prefix=None, **kwargs):\n        \"\"\"Format the results to json (standard format for COCO evaluation).\n\n        Args:\n            results (list[tuple | numpy.ndarray]): Testing results of the\n                dataset.\n            jsonfile_prefix (str): The prefix of json files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n\n        Returns:\n            tuple: (result_files, tmp_dir), result_files is a dict containing\n                the json filepaths, tmp_dir is the temporal directory created\n                for saving json files when jsonfile_prefix is not specified.\n        \"\"\"\n        assert isinstance(results, list), 'results must be a list'\n        assert len(results) == len(self), (\n            'The length of results is not equal to the dataset len: {} != {}'.\n            format(len(results), len(self)))\n\n        if jsonfile_prefix is None:\n            tmp_dir = tempfile.TemporaryDirectory()\n            jsonfile_prefix = osp.join(tmp_dir.name, 'results')\n        else:\n            tmp_dir = None\n\n        # currently the output prediction results could be in two formats\n        # 1. list of dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...)\n        # 2. list of dict('pts_bbox' or 'img_bbox':\n        #     dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...))\n        # this is a workaround to enable evaluation of both formats on nuScenes\n        # refer to https://github.com/open-mmlab/mmdetection3d/issues/449\n        if not ('pts_bbox' in results[0] or 'img_bbox' in results[0]):\n            result_files = self._format_bbox(results, jsonfile_prefix)\n        else:\n            # should take the inner dict out of 'pts_bbox' or 'img_bbox' dict\n            result_files = dict()\n            for name in results[0]:\n                # not evaluate 2D predictions on nuScenes\n                if '2d' in name:\n                    continue\n                print(f'\\nFormating bboxes of {name}')\n                results_ = [out[name] for out in results]\n                tmp_file_ = osp.join(jsonfile_prefix, name)\n                result_files.update(\n                    {name: self._format_bbox(results_, tmp_file_)})\n\n        return result_files, tmp_dir\n\n    def evaluate(self,\n                 results,\n                 metric='bbox',\n                 logger=None,\n                 jsonfile_prefix=None,\n                 result_names=['img_bbox'],\n                 show=False,\n                 out_dir=None,\n                 pipeline=None):\n        \"\"\"Evaluation in nuScenes protocol.\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            metric (str | list[str], optional): Metrics to be evaluated.\n                Default: 'bbox'.\n            logger (logging.Logger | str, optional): Logger used for printing\n                related information during evaluation. Default: None.\n            jsonfile_prefix (str): The prefix of json files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n            result_names (list[str], optional): Result names in the\n                metric prefix. Default: ['img_bbox'].\n            show (bool, optional): Whether to visualize.\n                Default: False.\n            out_dir (str, optional): Path to save the visualization results.\n                Default: None.\n            pipeline (list[dict], optional): raw data loading for showing.\n                Default: None.\n\n        Returns:\n            dict[str, float]: Results of each evaluation metric.\n        \"\"\"\n\n        result_files, tmp_dir = self.format_results(results, jsonfile_prefix)\n\n        if isinstance(result_files, dict):\n            results_dict = dict()\n            for name in result_names:\n                print('Evaluating bboxes of {}'.format(name))\n                ret_dict = self._evaluate_single(result_files[name])\n            results_dict.update(ret_dict)\n        elif isinstance(result_files, str):\n            results_dict = self._evaluate_single(result_files)\n\n        if tmp_dir is not None:\n            tmp_dir.cleanup()\n\n        if show or out_dir:\n            self.show(results, out_dir, pipeline=pipeline)\n        return results_dict\n\n    def _extract_data(self, index, pipeline, key, load_annos=False):\n        \"\"\"Load data using input pipeline and extract data according to key.\n\n        Args:\n            index (int): Index for accessing the target data.\n            pipeline (:obj:`Compose`): Composed data loading pipeline.\n            key (str | list[str]): One single or a list of data key.\n            load_annos (bool): Whether to load data annotations.\n                If True, need to set self.test_mode as False before loading.\n\n        Returns:\n            np.ndarray | torch.Tensor | list[np.ndarray | torch.Tensor]:\n                A single or a list of loaded data.\n        \"\"\"\n        assert pipeline is not None, 'data loading pipeline is not provided'\n        img_info = self.data_infos[index]\n        input_dict = dict(img_info=img_info)\n\n        if load_annos:\n            ann_info = self.get_ann_info(index)\n            input_dict.update(dict(ann_info=ann_info))\n\n        self.pre_pipeline(input_dict)\n        example = pipeline(input_dict)\n\n        # extract data items according to keys\n        if isinstance(key, str):\n            data = extract_result_dict(example, key)\n        else:\n            data = [extract_result_dict(example, k) for k in key]\n\n        return data\n\n    def _get_pipeline(self, pipeline):\n        \"\"\"Get data loading pipeline in self.show/evaluate function.\n\n        Args:\n            pipeline (list[dict]): Input pipeline. If None is given,\n                get from self.pipeline.\n        \"\"\"\n        if pipeline is None:\n            if not hasattr(self, 'pipeline') or self.pipeline is None:\n                warnings.warn(\n                    'Use default pipeline for data loading, this may cause '\n                    'errors when data is on ceph')\n                return self._build_default_pipeline()\n            loading_pipeline = get_loading_pipeline(self.pipeline.transforms)\n            return Compose(loading_pipeline)\n        return Compose(pipeline)\n\n    def _build_default_pipeline(self):\n        \"\"\"Build the default pipeline for this dataset.\"\"\"\n        pipeline = [\n            dict(type='LoadImageFromFileMono3D'),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=self.CLASSES,\n                with_label=False),\n            dict(type='Collect3D', keys=['img'])\n        ]\n        return Compose(pipeline)\n\n    def show(self, results, out_dir, show=False, pipeline=None):\n        \"\"\"Results visualization.\n\n        Args:\n            results (list[dict]): List of bounding boxes results.\n            out_dir (str): Output directory of visualization result.\n            show (bool): Whether to visualize the results online.\n                Default: False.\n            pipeline (list[dict], optional): raw data loading for showing.\n                Default: None.\n        \"\"\"\n        assert out_dir is not None, 'Expect out_dir, got none.'\n        pipeline = self._get_pipeline(pipeline)\n        for i, result in enumerate(results):\n            if 'img_bbox' in result.keys():\n                result = result['img_bbox']\n            data_info = self.data_infos[i]\n            img_path = data_info['file_name']\n            file_name = osp.split(img_path)[-1].split('.')[0]\n            img, img_metas = self._extract_data(i, pipeline,\n                                                ['img', 'img_metas'])\n            # need to transpose channel to first dim\n            img = img.numpy().transpose(1, 2, 0)\n            gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d']\n            pred_bboxes = result['boxes_3d']\n            show_multi_modality_result(\n                img,\n                gt_bboxes,\n                pred_bboxes,\n                img_metas['cam2img'],\n                out_dir,\n                file_name,\n                box_mode='camera',\n                show=show)\n\n\ndef output_to_nusc_box(detection):\n    \"\"\"Convert the output to the box class in the nuScenes.\n\n    Args:\n        detection (dict): Detection results.\n\n            - boxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.\n            - scores_3d (torch.Tensor): Detection scores.\n            - labels_3d (torch.Tensor): Predicted box labels.\n            - attrs_3d (torch.Tensor, optional): Predicted attributes.\n\n    Returns:\n        list[:obj:`NuScenesBox`]: List of standard NuScenesBoxes.\n    \"\"\"\n    box3d = detection['boxes_3d']\n    scores = detection['scores_3d'].numpy()\n    labels = detection['labels_3d'].numpy()\n    attrs = None\n    if 'attrs_3d' in detection:\n        attrs = detection['attrs_3d'].numpy()\n\n    box_gravity_center = box3d.gravity_center.numpy()\n    box_dims = box3d.dims.numpy()\n    box_yaw = box3d.yaw.numpy()\n\n    # convert the dim/rot to nuscbox convention\n    box_dims[:, [0, 1, 2]] = box_dims[:, [2, 0, 1]]\n    box_yaw = -box_yaw\n\n    box_list = []\n    for i in range(len(box3d)):\n        q1 = pyquaternion.Quaternion(axis=[0, 0, 1], radians=box_yaw[i])\n        q2 = pyquaternion.Quaternion(axis=[1, 0, 0], radians=np.pi / 2)\n        quat = q2 * q1\n        velocity = (box3d.tensor[i, 7], 0.0, box3d.tensor[i, 8])\n        box = NuScenesBox(\n            box_gravity_center[i],\n            box_dims[i],\n            quat,\n            label=labels[i],\n            score=scores[i],\n            velocity=velocity)\n        box_list.append(box)\n    return box_list, attrs\n\n\ndef cam_nusc_box_to_global(info,\n                           boxes,\n                           attrs,\n                           classes,\n                           eval_configs,\n                           eval_version='detection_cvpr_2019'):\n    \"\"\"Convert the box from camera to global coordinate.\n\n    Args:\n        info (dict): Info for a specific sample data, including the\n            calibration information.\n        boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.\n        classes (list[str]): Mapped classes in the evaluation.\n        eval_configs (object): Evaluation configuration object.\n        eval_version (str, optional): Evaluation version.\n            Default: 'detection_cvpr_2019'\n\n    Returns:\n        list: List of standard NuScenesBoxes in the global\n            coordinate.\n    \"\"\"\n    box_list = []\n    attr_list = []\n    for (box, attr) in zip(boxes, attrs):\n        # Move box to ego vehicle coord system\n        box.rotate(pyquaternion.Quaternion(info['cam2ego_rotation']))\n        box.translate(np.array(info['cam2ego_translation']))\n        # filter det in ego.\n        cls_range_map = eval_configs.class_range\n        radius = np.linalg.norm(box.center[:2], 2)\n        det_range = cls_range_map[classes[box.label]]\n        if radius > det_range:\n            continue\n        # Move box to global coord system\n        box.rotate(pyquaternion.Quaternion(info['ego2global_rotation']))\n        box.translate(np.array(info['ego2global_translation']))\n        box_list.append(box)\n        attr_list.append(attr)\n    return box_list, attr_list\n\n\ndef global_nusc_box_to_cam(info,\n                           boxes,\n                           classes,\n                           eval_configs,\n                           eval_version='detection_cvpr_2019'):\n    \"\"\"Convert the box from global to camera coordinate.\n\n    Args:\n        info (dict): Info for a specific sample data, including the\n            calibration information.\n        boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.\n        classes (list[str]): Mapped classes in the evaluation.\n        eval_configs (object): Evaluation configuration object.\n        eval_version (str, optional): Evaluation version.\n            Default: 'detection_cvpr_2019'\n\n    Returns:\n        list: List of standard NuScenesBoxes in the global\n            coordinate.\n    \"\"\"\n    box_list = []\n    for box in boxes:\n        # Move box to ego vehicle coord system\n        box.translate(-np.array(info['ego2global_translation']))\n        box.rotate(\n            pyquaternion.Quaternion(info['ego2global_rotation']).inverse)\n        # filter det in ego.\n        cls_range_map = eval_configs.class_range\n        radius = np.linalg.norm(box.center[:2], 2)\n        det_range = cls_range_map[classes[box.label]]\n        if radius > det_range:\n            continue\n        # Move box to camera coord system\n        box.translate(-np.array(info['cam2ego_translation']))\n        box.rotate(pyquaternion.Quaternion(info['cam2ego_rotation']).inverse)\n        box_list.append(box)\n    return box_list\n\n\ndef nusc_box_to_cam_box3d(boxes):\n    \"\"\"Convert boxes from :obj:`NuScenesBox` to :obj:`CameraInstance3DBoxes`.\n\n    Args:\n        boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.\n\n    Returns:\n        tuple (:obj:`CameraInstance3DBoxes` | torch.Tensor | torch.Tensor):\n            Converted 3D bounding boxes, scores and labels.\n    \"\"\"\n    locs = torch.Tensor([b.center for b in boxes]).view(-1, 3)\n    dims = torch.Tensor([b.wlh for b in boxes]).view(-1, 3)\n    rots = torch.Tensor([b.orientation.yaw_pitch_roll[0]\n                         for b in boxes]).view(-1, 1)\n    velocity = torch.Tensor([b.velocity[0::2] for b in boxes]).view(-1, 2)\n\n    # convert nusbox to cambox convention\n    dims[:, [0, 1, 2]] = dims[:, [1, 2, 0]]\n    rots = -rots\n\n    boxes_3d = torch.cat([locs, dims, rots, velocity], dim=1).cuda()\n    cam_boxes3d = CameraInstance3DBoxes(\n        boxes_3d, box_dim=9, origin=(0.5, 0.5, 0.5))\n    scores = torch.Tensor([b.score for b in boxes]).cuda()\n    labels = torch.LongTensor([b.label for b in boxes]).cuda()\n    nms_scores = scores.new_zeros(scores.shape[0], 10 + 1)\n    indices = labels.new_tensor(list(range(scores.shape[0])))\n    nms_scores[indices, labels] = scores\n    return cam_boxes3d, nms_scores, labels\n"
  },
  {
    "path": "mmdet3d/datasets/occ_metrics.py",
    "content": "import numpy as np\nimport os\nfrom pathlib import Path\nfrom tqdm import tqdm\nimport pickle as pkl\nimport argparse\nimport time\nimport torch\nimport sys, platform\nfrom sklearn.neighbors import KDTree\nfrom termcolor import colored\nfrom pathlib import Path\nfrom copy import deepcopy\nfrom functools import reduce\n\nnp.seterr(divide='ignore', invalid='ignore')\nos.environ[\"KMP_DUPLICATE_LIB_OK\"] = \"TRUE\"\n\n\ndef pcolor(string, color, on_color=None, attrs=None):\n    \"\"\"\n    Produces a colored string for printing\n    Parameters\n    ----------\n    string : str\n        String that will be colored\n    color : str\n        Color to use\n    on_color : str\n        Background color to use\n    attrs : list of str\n        Different attributes for the string\n    Returns\n    -------\n    string: str\n        Colored string\n    \"\"\"\n    return colored(string, color, on_color, attrs)\n\n\ndef getCellCoordinates(points, voxelSize):\n    return (points / voxelSize).astype(np.int)\n\n\ndef getNumUniqueCells(cells):\n    M = cells.max() + 1\n    return np.unique(cells[:, 0] + M * cells[:, 1] + M ** 2 * cells[:, 2]).shape[0]\n\n\nclass Metric_mIoU():\n    def __init__(self,\n                 save_dir='.',\n                 num_classes=18,\n                 use_lidar_mask=False,\n                 use_image_mask=False,\n                 min_d = -1,\n                 max_d = 100,\n                 ):\n        self.class_names = ['others','barrier', 'bicycle', 'bus', 'car', 'construction_vehicle',\n                            'motorcycle', 'pedestrian', 'traffic_cone', 'trailer', 'truck',\n                            'driveable_surface', 'other_flat', 'sidewalk',\n                            'terrain', 'manmade', 'vegetation','free']\n        self.save_dir = save_dir\n        self.use_lidar_mask = use_lidar_mask\n        self.use_image_mask = use_image_mask\n        self.num_classes = num_classes\n\n        self.point_cloud_range = [-40.0, -40.0, -1.0, 40.0, 40.0, 5.4]\n        self.occupancy_size = [0.4, 0.4, 0.4]\n        self.voxel_size = 0.4\n        self.occ_xdim = int((self.point_cloud_range[3] - self.point_cloud_range[0]) / self.occupancy_size[0])\n        self.occ_ydim = int((self.point_cloud_range[4] - self.point_cloud_range[1]) / self.occupancy_size[1])\n        self.occ_zdim = int((self.point_cloud_range[5] - self.point_cloud_range[2]) / self.occupancy_size[2])\n        self.voxel_num = self.occ_xdim * self.occ_ydim * self.occ_zdim\n        self.hist = np.zeros((self.num_classes, self.num_classes))\n        self.cnt = 0\n        self.max_d = max_d\n        self.min_d = min_d\n\n    def hist_info(self, n_cl, pred, gt):\n        \"\"\"\n        build confusion matrix\n        # empty classes:0\n        non-empty class: 0-16\n        free voxel class: 17\n        Args:\n            n_cl (int): num_classes_occupancy\n            pred (1-d array): pred_occupancy_label\n            gt (1-d array): gt_occupancu_label\n        Returns:\n            tuple:(hist, correctly number_predicted_labels, num_labelled_sample)\n        \"\"\"\n\n        assert pred.shape == gt.shape\n        k = (gt >= 0) & (gt < n_cl)  # exclude 255\n        labeled = np.sum(k)\n        correct = np.sum((pred[k] == gt[k]))\n\n        return (\n            np.bincount(\n                n_cl * gt[k].astype(int) + pred[k].astype(int), minlength=n_cl ** 2\n            ).reshape(n_cl, n_cl),\n            correct,\n            labeled,\n        )\n\n    def per_class_iu(self, hist):\n\n        return np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))\n\n    def compute_mIoU(self, pred, label, n_classes):\n        hist = np.zeros((n_classes, n_classes))\n        new_hist, correct, labeled = self.hist_info(n_classes, pred.flatten(), label.flatten())\n        hist += new_hist\n        mIoUs = self.per_class_iu(hist)\n        # for ind_class in range(n_classes):\n        #     print(str(round(mIoUs[ind_class] * 100, 2)))\n        # print('===> mIoU: ' + str(round(np.nanmean(mIoUs) * 100, 2)))\n        return round(np.nanmean(mIoUs) * 100, 2), hist\n\n\n    def add_batch(self,semantics_pred,semantics_gt,mask_lidar,mask_camera):\n        self.cnt += 1\n        if len(semantics_pred.shape) == 4 or len(semantics_pred.shape) == 2:\n            semantics_pred = semantics_pred.argmax(-1)\n        \n        if len(semantics_pred.shape) == 1:\n            semantics_pred_ = deepcopy(semantics_gt)\n            semantics_pred_[mask_camera] = semantics_pred\n            semantics_pred = semantics_pred_\n\n\n        xx, yy = np.meshgrid(np.arange(200), np.arange(200))\n        mask = (np.stack([yy, xx], -1) -100) * 0.4\n        distance_map = np.linalg.norm(mask, 2, -1)\n        distance_map = (distance_map<=self.max_d) & (distance_map>=self.min_d)\n        # print(semantics_pred.shape)\n        # from IPython import embed\n        # embed()\n        # exit()\n\n        # semantics_pred = semantics_pred[distance_map]\n        # semantics_gt = semantics_gt[distance_map]\n        # mask_camera = mask_camera[distance_map]\n        mask_camera = mask_camera & distance_map[:,:, None]\n\n        assert self.use_image_mask\n        if self.use_image_mask:\n            masked_semantics_gt = semantics_gt[mask_camera]\n            if len(semantics_pred.shape) == 3:\n                masked_semantics_pred = semantics_pred[mask_camera]\n            elif len(semantics_pred.shape) == 1:\n                masked_semantics_pred = semantics_pred\n           \n        elif self.use_lidar_mask:\n            masked_semantics_gt = semantics_gt[mask_lidar]\n            masked_semantics_pred = semantics_pred[mask_lidar]\n        else:\n            masked_semantics_gt = semantics_gt\n            masked_semantics_pred = semantics_pred\n\n            # # pred = np.random.randint(low=0, high=17, size=masked_semantics.shape)\n        _, _hist = self.compute_mIoU(masked_semantics_pred, masked_semantics_gt, self.num_classes)\n        self.hist += _hist\n\n    def count_miou(self):\n        res = {}\n        mIoU = self.per_class_iu(self.hist)\n        # assert cnt == num_samples, 'some samples are not included in the miou calculation'\n        print(f'===> per class IoU of {self.cnt} samples:')\n        for ind_class in range(self.num_classes-1):\n            print(f'===> {self.class_names[ind_class]} - IoU = ' + str(round(mIoU[ind_class] * 100, 4)))\n            res[self.class_names[ind_class]] = round(mIoU[ind_class] * 100, 2)\n\n        print(f'===> mIoU of {self.cnt} samples: ' + str(round(np.nanmean(mIoU[:self.num_classes-1]) * 100, 2)))\n        res['Overall'] =  round(np.nanmean(mIoU[:self.num_classes-1]) * 100, 2)\n        # print(f'===> sample-wise averaged mIoU of {cnt} samples: ' + str(round(np.nanmean(mIoU_avg), 2)))\n\n        return res\n\n\nclass Metric_FScore():\n    def __init__(self,\n\n                 leaf_size=10,\n                 threshold_acc=0.6,\n                 threshold_complete=0.6,\n                 voxel_size=[0.4, 0.4, 0.4],\n                 range=[-40, -40, -1, 40, 40, 5.4],\n                 void=[17, 255],\n                 use_lidar_mask=False,\n                 use_image_mask=False, ) -> None:\n\n        self.leaf_size = leaf_size\n        self.threshold_acc = threshold_acc\n        self.threshold_complete = threshold_complete\n        self.voxel_size = voxel_size\n        self.range = range\n        self.void = void\n        self.use_lidar_mask = use_lidar_mask\n        self.use_image_mask = use_image_mask\n        self.cnt=0\n        self.tot_acc = 0.\n        self.tot_cmpl = 0.\n        self.tot_f1_mean = 0.\n        self.eps = 1e-8\n\n\n\n    def voxel2points(self, voxel):\n        # occIdx = torch.where(torch.logical_and(voxel != FREE, voxel != NOT_OBSERVED))\n        # if isinstance(voxel, np.ndarray): voxel = torch.from_numpy(voxel)\n        mask = np.logical_not(reduce(np.logical_or, [voxel == self.void[i] for i in range(len(self.void))]))\n        occIdx = np.where(mask)\n\n        points = np.concatenate((occIdx[0][:, None] * self.voxel_size[0] + self.voxel_size[0] / 2 + self.range[0], \\\n                                 occIdx[1][:, None] * self.voxel_size[1] + self.voxel_size[1] / 2 + self.range[1], \\\n                                 occIdx[2][:, None] * self.voxel_size[2] + self.voxel_size[2] / 2 + self.range[2]),\n                                axis=1)\n        return points\n\n    def add_batch(self,semantics_pred,semantics_gt,mask_lidar,mask_camera ):\n\n        # for scene_token in tqdm(preds_dict.keys()):\n        self.cnt += 1\n        \n        if len(semantics_pred.shape) == 4 or len(semantics_pred.shape) == 2:\n            semantics_pred = semantics_pred.argmax(-1)\n        \n        assert self.use_image_mask\n\n        if self.use_image_mask:\n            \n            semantics_gt[mask_camera == False] = 255\n            if len(semantics_pred.shape) == 1:\n                semantics_pred_ = deepcopy(semantics_gt)\n                semantics_pred_[mask_camera] = semantics_pred\n                semantics_pred = semantics_pred_\n            else:\n                semantics_pred[mask_camera == False] = 255\n        elif self.use_lidar_mask:\n            semantics_gt[mask_lidar == False] = 255\n            semantics_pred[mask_lidar == False] = 255\n        else:\n            pass\n\n        ground_truth = self.voxel2points(semantics_gt)\n        prediction = self.voxel2points(semantics_pred)\n        if prediction.shape[0] == 0:\n            accuracy=0\n            completeness=0\n            fmean=0\n\n        else:\n            prediction_tree = KDTree(prediction, leaf_size=self.leaf_size)\n            ground_truth_tree = KDTree(ground_truth, leaf_size=self.leaf_size)\n            complete_distance, _ = prediction_tree.query(ground_truth)\n            complete_distance = complete_distance.flatten()\n\n            accuracy_distance, _ = ground_truth_tree.query(prediction)\n            accuracy_distance = accuracy_distance.flatten()\n\n            # evaluate completeness\n            complete_mask = complete_distance < self.threshold_complete\n            completeness = complete_mask.mean()\n\n            # evalute accuracy\n            accuracy_mask = accuracy_distance < self.threshold_acc\n            accuracy = accuracy_mask.mean()\n\n            fmean = 2.0 / (1 / (accuracy+self.eps) + 1 / (completeness+self.eps))\n\n        self.tot_acc += accuracy\n        self.tot_cmpl += completeness\n        self.tot_f1_mean += fmean\n\n    def count_fscore(self,):\n        res = {}\n        base_color, attrs = 'red', ['bold', 'dark']\n        print(pcolor('\\n######## F score: {} #######'.format(self.tot_f1_mean / self.cnt), base_color, attrs=attrs))\n        res['f-score'] = round(self.tot_f1_mean / self.cnt, 4)\n        return res\n\n\nimport argparse\nimport os \nimport sys\nimport numpy as np\nfrom tqdm import tqdm\nimport time\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='eval occupancy')\n    parser.add_argument('pred_path', help='pred_path')\n    parser.add_argument('--gt_path', default='/mount/data/occupancy_cvpr2023/gts', help='checkpoint file')\n    parser.add_argument('--min_d', default=-1, type=int, help='min range')\n    parser.add_argument('--max_d', default=100, type=int, help='max range')\n    parser.add_argument(\n        '--eval_fscore',\n        action='store_true',\n        help='whether to eval f-score.')\n    args = parser.parse_args()\n    return args\n\ndef eval(args):\n    occ_eval_metrics = Metric_mIoU(\n            num_classes=18,\n            use_lidar_mask=False,\n            min_d = args.min_d,\n            max_d = args.max_d,\n            use_image_mask=True)\n    if args.eval_fscore:\n        fscore_eval_metrics = Metric_FScore(\n                leaf_size=10,\n                threshold_acc=0.4,\n                threshold_complete=0.4,\n                voxel_size=[0.4, 0.4, 0.4],\n                range=[-40, -40, -1, 40, 40, 5.4],\n                void=[17, 255],\n                use_lidar_mask=False,\n                use_image_mask=True,)\n    # print(len(os.listdir(args.pred_path)))\n    pred_files = os.listdir(args.pred_path)\n    val_splits = ['scene-0003', 'scene-0012', 'scene-0013', 'scene-0014', 'scene-0015', 'scene-0016', 'scene-0017', 'scene-0018', 'scene-0035', 'scene-0036', 'scene-0038', 'scene-0039', 'scene-0092', 'scene-0093', 'scene-0094', 'scene-0095', 'scene-0096', 'scene-0097', 'scene-0098', 'scene-0099', 'scene-0100', 'scene-0101', 'scene-0102', 'scene-0103', 'scene-0104', 'scene-0105', 'scene-0106', 'scene-0107', 'scene-0108', 'scene-0109', 'scene-0110', 'scene-0221', 'scene-0268', 'scene-0269', 'scene-0270', 'scene-0271', 'scene-0272', 'scene-0273', 'scene-0274', 'scene-0275', 'scene-0276', 'scene-0277', 'scene-0278', 'scene-0329', 'scene-0330', 'scene-0331', 'scene-0332', 'scene-0344', 'scene-0345', 'scene-0346', 'scene-0519', 'scene-0520', 'scene-0521', 'scene-0522', 'scene-0523', 'scene-0524', 'scene-0552', 'scene-0553', 'scene-0554', 'scene-0555', 'scene-0556', 'scene-0557', 'scene-0558', 'scene-0559', 'scene-0560', 'scene-0561', 'scene-0562', 'scene-0563', 'scene-0564', 'scene-0565', 'scene-0625', 'scene-0626', 'scene-0627', 'scene-0629', 'scene-0630', 'scene-0632', 'scene-0633', 'scene-0634', 'scene-0635', 'scene-0636', 'scene-0637', 'scene-0638', 'scene-0770', 'scene-0771', 'scene-0775', 'scene-0777', 'scene-0778', 'scene-0780', 'scene-0781', 'scene-0782', 'scene-0783', 'scene-0784', 'scene-0794', 'scene-0795', 'scene-0796', 'scene-0797', 'scene-0798', 'scene-0799', 'scene-0800', 'scene-0802', 'scene-0904', 'scene-0905', 'scene-0906', 'scene-0907', 'scene-0908', 'scene-0909', 'scene-0910', 'scene-0911', 'scene-0912', 'scene-0913', 'scene-0914', 'scene-0915', 'scene-0916', 'scene-0917', 'scene-0919', 'scene-0920', 'scene-0921', 'scene-0922', 'scene-0923', 'scene-0924', 'scene-0925', 'scene-0926', 'scene-0927', 'scene-0928', 'scene-0929', 'scene-0930', 'scene-0931', 'scene-0962', 'scene-0963', 'scene-0966', 'scene-0967', 'scene-0968', 'scene-0969', 'scene-0971', 'scene-0972', 'scene-1059', 'scene-1060', 'scene-1061', 'scene-1062', 'scene-1063', 'scene-1064', 'scene-1065', 'scene-1066', 'scene-1067', 'scene-1068', 'scene-1069', 'scene-1070', 'scene-1071', 'scene-1072', 'scene-1073']\n    mini_splits = ['scene-0103', 'scene-0916']\n    for scene_name in tqdm(val_splits):\n        if scene_name not in val_splits:continue\n        for sample_token in os.listdir(os.path.join(args.gt_path, scene_name)):\n            occ_gt = np.load(os.path.join(args.gt_path, scene_name, sample_token, 'labels.npz'))\n            occ_pred = np.load(os.path.join(args.pred_path, scene_name+'_'+sample_token+'.npz'))['pred']\n            gt_semantics = occ_gt['semantics']\n            mask_lidar = occ_gt['mask_lidar'].astype(bool)\n            mask_camera = occ_gt['mask_camera'].astype(bool)\n            occ_eval_metrics.add_batch(occ_pred, gt_semantics, mask_lidar, mask_camera)\n            if args.eval_fscore:\n                fscore_eval_metrics.add_batch(occ_pred, gt_semantics, mask_lidar, mask_camera)\n    res = occ_eval_metrics.count_miou()\n    if args.eval_fscore:\n        fscore_eval_metrics.count_fscore()\n        \n\nif __name__ == '__main__':\n    args = parse_args()\n    eval(args)\n"
  },
  {
    "path": "mmdet3d/datasets/occupancy_eval.py",
    "content": "from .occ_metrics import Metric_mIoU, Metric_FScore\nimport argparse\nimport os \nimport sys\nimport nunmpy as np\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='eval occupancy')\n    parser.add_argument('pred_path', help='pred_path')\n    parser.add_argument('--gt', default='/mount/data/occupancy_cvpr2023/gts', help='checkpoint file')\n    parser.add_argument(\n        '--eval_fscore',\n        action='store_true',\n        help='whether to eval f-score.')\n    args = parser.parse_args()\n    return args\n\ndef eval(args):\n    occ_eval_metrics = Metric_mIoU(\n            num_classes=18,\n            use_lidar_mask=False,\n            use_image_mask=True)\n    if args.eval_fscore:\n        fscore_eval_metrics = Metric_FScore(\n                leaf_size=10,\n                threshold_acc=0.4,\n                threshold_complete=0.4,\n                voxel_size=[0.4, 0.4, 0.4],\n                range=[-40, -40, -1, 40, 40, 5.4],\n                void=[17, 255],\n                use_lidar_mask=False,\n                use_image_mask=True,)\n    for pred_path in os.listdir(args.pred_path):\n        occ_pred = np.load(os.path.join(args.pred_path, pred_path))['pred']\n        occ_gt = np.load(os.path.join(args.gt_path, pred_path.split('.')[0], 'labels.npz'))\n        gt_semantics = occ_gt['semantics']\n        mask_lidar = occ_gt['mask_lidar'].astype(bool)\n        mask_camera = occ_gt['mask_camera'].astype(bool)\n        occ_eval_metrics.add_batch(occ_pred, gt_semantics, mask_lidar, mask_camera)\n        if args.eval_fscore:\n            fscore_eval_metrics.add_batch(occ_pred, gt_semantics, mask_lidar, mask_camera)\n        res = occ_eval_metrics.count_miou()\n        if eval_fscore:\n            fscore_eval_metrics.count_fscore()\n        \n\nif __main__:\n    args = parse_args()\n    eval(args)\n"
  },
  {
    "path": "mmdet3d/datasets/pipelines/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .compose import Compose\nfrom .dbsampler import DataBaseSampler\nfrom .formating import Collect3D, DefaultFormatBundle, DefaultFormatBundle3D\nfrom .loading import (LoadAnnotations3D, LoadAnnotationsBEVDepth,\n                      LoadImageFromFileMono3D, LoadMultiViewImageFromFiles,\n                      LoadPointsFromDict, LoadPointsFromFile,\n                      LoadPointsFromMultiSweeps, NormalizePointsColor,\n                      PointSegClassMapping, PointToMultiViewDepth,\n                      PrepareImageInputs, LoadVectorMap)\nfrom .test_time_aug import MultiScaleFlipAug3D\n# yapf: disable\nfrom .transforms_3d import (AffineResize, BackgroundPointsFilter,\n                            GlobalAlignment, GlobalRotScaleTrans,\n                            IndoorPatchPointSample, IndoorPointSample,\n                            MultiViewWrapper, ObjectNameFilter, ObjectNoise,\n                            ObjectRangeFilter, ObjectSample, PointSample,\n                            PointShuffle, PointsRangeFilter,\n                            RandomDropPointsColor, RandomFlip3D,\n                            RandomJitterPoints, RandomRotate, RandomShiftScale,\n                            RangeLimitedRandomCrop, VoxelBasedPointSampler)\n\n__all__ = [\n    'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans',\n    'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D',\n    'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile',\n    'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler',\n    'NormalizePointsColor', 'LoadAnnotations3D', 'IndoorPointSample',\n    'PointSample', 'PointSegClassMapping', 'MultiScaleFlipAug3D',\n    'LoadPointsFromMultiSweeps', 'BackgroundPointsFilter',\n    'VoxelBasedPointSampler', 'GlobalAlignment', 'IndoorPatchPointSample',\n    'LoadImageFromFileMono3D', 'ObjectNameFilter', 'RandomDropPointsColor',\n    'RandomJitterPoints', 'AffineResize', 'RandomShiftScale',\n    'LoadPointsFromDict', 'MultiViewWrapper', 'RandomRotate',\n    'RangeLimitedRandomCrop', 'PrepareImageInputs',\n    'LoadAnnotationsBEVDepth', 'PointToMultiViewDepth', 'LoadVectorMap'\n]\n"
  },
  {
    "path": "mmdet3d/datasets/pipelines/compose.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport collections\n\nfrom mmcv.utils import build_from_cfg\n\nfrom mmdet.datasets.builder import PIPELINES as MMDET_PIPELINES\nfrom ..builder import PIPELINES\n\n\n@PIPELINES.register_module()\nclass Compose:\n    \"\"\"Compose multiple transforms sequentially. The pipeline registry of\n    mmdet3d separates with mmdet, however, sometimes we may need to use mmdet's\n    pipeline. So the class is rewritten to be able to use pipelines from both\n    mmdet3d and mmdet.\n\n    Args:\n        transforms (Sequence[dict | callable]): Sequence of transform object or\n            config dict to be composed.\n    \"\"\"\n\n    def __init__(self, transforms):\n        assert isinstance(transforms, collections.abc.Sequence)\n        self.transforms = []\n        for transform in transforms:\n            if isinstance(transform, dict):\n                _, key = PIPELINES.split_scope_key(transform['type'])\n                if key in PIPELINES._module_dict.keys():\n                    transform = build_from_cfg(transform, PIPELINES)\n                else:\n                    transform = build_from_cfg(transform, MMDET_PIPELINES)\n                self.transforms.append(transform)\n            elif callable(transform):\n                self.transforms.append(transform)\n            else:\n                raise TypeError('transform must be callable or a dict')\n\n    def __call__(self, data):\n        \"\"\"Call function to apply transforms sequentially.\n\n        Args:\n            data (dict): A result dict contains the data to transform.\n\n        Returns:\n           dict: Transformed data.\n        \"\"\"\n\n        for t in self.transforms:\n            data = t(data)\n            if data is None:\n                return None\n        return data\n\n    def __repr__(self):\n        format_string = self.__class__.__name__ + '('\n        for t in self.transforms:\n            format_string += '\\n'\n            format_string += f'    {t}'\n        format_string += '\\n)'\n        return format_string\n"
  },
  {
    "path": "mmdet3d/datasets/pipelines/data_augment_utils.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport numba\nimport numpy as np\nfrom numba.core.errors import NumbaPerformanceWarning\n\nfrom mmdet3d.core.bbox import box_np_ops\n\nwarnings.filterwarnings('ignore', category=NumbaPerformanceWarning)\n\n\n@numba.njit\ndef _rotation_box2d_jit_(corners, angle, rot_mat_T):\n    \"\"\"Rotate 2D boxes.\n\n    Args:\n        corners (np.ndarray): Corners of boxes.\n        angle (float): Rotation angle.\n        rot_mat_T (np.ndarray): Transposed rotation matrix.\n    \"\"\"\n    rot_sin = np.sin(angle)\n    rot_cos = np.cos(angle)\n    rot_mat_T[0, 0] = rot_cos\n    rot_mat_T[0, 1] = rot_sin\n    rot_mat_T[1, 0] = -rot_sin\n    rot_mat_T[1, 1] = rot_cos\n    corners[:] = corners @ rot_mat_T\n\n\n@numba.jit(nopython=True)\ndef box_collision_test(boxes, qboxes, clockwise=True):\n    \"\"\"Box collision test.\n\n    Args:\n        boxes (np.ndarray): Corners of current boxes.\n        qboxes (np.ndarray): Boxes to be avoid colliding.\n        clockwise (bool, optional): Whether the corners are in\n            clockwise order. Default: True.\n    \"\"\"\n    N = boxes.shape[0]\n    K = qboxes.shape[0]\n    ret = np.zeros((N, K), dtype=np.bool_)\n    slices = np.array([1, 2, 3, 0])\n    lines_boxes = np.stack((boxes, boxes[:, slices, :]),\n                           axis=2)  # [N, 4, 2(line), 2(xy)]\n    lines_qboxes = np.stack((qboxes, qboxes[:, slices, :]), axis=2)\n    # vec = np.zeros((2,), dtype=boxes.dtype)\n    boxes_standup = box_np_ops.corner_to_standup_nd_jit(boxes)\n    qboxes_standup = box_np_ops.corner_to_standup_nd_jit(qboxes)\n    for i in range(N):\n        for j in range(K):\n            # calculate standup first\n            iw = (\n                min(boxes_standup[i, 2], qboxes_standup[j, 2]) -\n                max(boxes_standup[i, 0], qboxes_standup[j, 0]))\n            if iw > 0:\n                ih = (\n                    min(boxes_standup[i, 3], qboxes_standup[j, 3]) -\n                    max(boxes_standup[i, 1], qboxes_standup[j, 1]))\n                if ih > 0:\n                    for k in range(4):\n                        for box_l in range(4):\n                            A = lines_boxes[i, k, 0]\n                            B = lines_boxes[i, k, 1]\n                            C = lines_qboxes[j, box_l, 0]\n                            D = lines_qboxes[j, box_l, 1]\n                            acd = (D[1] - A[1]) * (C[0] -\n                                                   A[0]) > (C[1] - A[1]) * (\n                                                       D[0] - A[0])\n                            bcd = (D[1] - B[1]) * (C[0] -\n                                                   B[0]) > (C[1] - B[1]) * (\n                                                       D[0] - B[0])\n                            if acd != bcd:\n                                abc = (C[1] - A[1]) * (B[0] - A[0]) > (\n                                    B[1] - A[1]) * (\n                                        C[0] - A[0])\n                                abd = (D[1] - A[1]) * (B[0] - A[0]) > (\n                                    B[1] - A[1]) * (\n                                        D[0] - A[0])\n                                if abc != abd:\n                                    ret[i, j] = True  # collision.\n                                    break\n                        if ret[i, j] is True:\n                            break\n                    if ret[i, j] is False:\n                        # now check complete overlap.\n                        # box overlap qbox:\n                        box_overlap_qbox = True\n                        for box_l in range(4):  # point l in qboxes\n                            for k in range(4):  # corner k in boxes\n                                vec = boxes[i, k] - boxes[i, (k + 1) % 4]\n                                if clockwise:\n                                    vec = -vec\n                                cross = vec[1] * (\n                                    boxes[i, k, 0] - qboxes[j, box_l, 0])\n                                cross -= vec[0] * (\n                                    boxes[i, k, 1] - qboxes[j, box_l, 1])\n                                if cross >= 0:\n                                    box_overlap_qbox = False\n                                    break\n                            if box_overlap_qbox is False:\n                                break\n\n                        if box_overlap_qbox is False:\n                            qbox_overlap_box = True\n                            for box_l in range(4):  # point box_l in boxes\n                                for k in range(4):  # corner k in qboxes\n                                    vec = qboxes[j, k] - qboxes[j, (k + 1) % 4]\n                                    if clockwise:\n                                        vec = -vec\n                                    cross = vec[1] * (\n                                        qboxes[j, k, 0] - boxes[i, box_l, 0])\n                                    cross -= vec[0] * (\n                                        qboxes[j, k, 1] - boxes[i, box_l, 1])\n                                    if cross >= 0:  #\n                                        qbox_overlap_box = False\n                                        break\n                                if qbox_overlap_box is False:\n                                    break\n                            if qbox_overlap_box:\n                                ret[i, j] = True  # collision.\n                        else:\n                            ret[i, j] = True  # collision.\n    return ret\n\n\n@numba.njit\ndef noise_per_box(boxes, valid_mask, loc_noises, rot_noises):\n    \"\"\"Add noise to every box (only on the horizontal plane).\n\n    Args:\n        boxes (np.ndarray): Input boxes with shape (N, 5).\n        valid_mask (np.ndarray): Mask to indicate which boxes are valid\n            with shape (N).\n        loc_noises (np.ndarray): Location noises with shape (N, M, 3).\n        rot_noises (np.ndarray): Rotation noises with shape (N, M).\n\n    Returns:\n        np.ndarray: Mask to indicate whether the noise is\n            added successfully (pass the collision test).\n    \"\"\"\n    num_boxes = boxes.shape[0]\n    num_tests = loc_noises.shape[1]\n    box_corners = box_np_ops.box2d_to_corner_jit(boxes)\n    current_corners = np.zeros((4, 2), dtype=boxes.dtype)\n    rot_mat_T = np.zeros((2, 2), dtype=boxes.dtype)\n    success_mask = -np.ones((num_boxes, ), dtype=np.int64)\n    # print(valid_mask)\n    for i in range(num_boxes):\n        if valid_mask[i]:\n            for j in range(num_tests):\n                current_corners[:] = box_corners[i]\n                current_corners -= boxes[i, :2]\n                _rotation_box2d_jit_(current_corners, rot_noises[i, j],\n                                     rot_mat_T)\n                current_corners += boxes[i, :2] + loc_noises[i, j, :2]\n                coll_mat = box_collision_test(\n                    current_corners.reshape(1, 4, 2), box_corners)\n                coll_mat[0, i] = False\n                # print(coll_mat)\n                if not coll_mat.any():\n                    success_mask[i] = j\n                    box_corners[i] = current_corners\n                    break\n    return success_mask\n\n\n@numba.njit\ndef noise_per_box_v2_(boxes, valid_mask, loc_noises, rot_noises,\n                      global_rot_noises):\n    \"\"\"Add noise to every box (only on the horizontal plane). Version 2 used\n    when enable global rotations.\n\n    Args:\n        boxes (np.ndarray): Input boxes with shape (N, 5).\n        valid_mask (np.ndarray): Mask to indicate which boxes are valid\n            with shape (N).\n        loc_noises (np.ndarray): Location noises with shape (N, M, 3).\n        rot_noises (np.ndarray): Rotation noises with shape (N, M).\n\n    Returns:\n        np.ndarray: Mask to indicate whether the noise is\n            added successfully (pass the collision test).\n    \"\"\"\n    num_boxes = boxes.shape[0]\n    num_tests = loc_noises.shape[1]\n    box_corners = box_np_ops.box2d_to_corner_jit(boxes)\n    current_corners = np.zeros((4, 2), dtype=boxes.dtype)\n    current_box = np.zeros((1, 5), dtype=boxes.dtype)\n    rot_mat_T = np.zeros((2, 2), dtype=boxes.dtype)\n    dst_pos = np.zeros((2, ), dtype=boxes.dtype)\n    success_mask = -np.ones((num_boxes, ), dtype=np.int64)\n    corners_norm = np.zeros((4, 2), dtype=boxes.dtype)\n    corners_norm[1, 1] = 1.0\n    corners_norm[2] = 1.0\n    corners_norm[3, 0] = 1.0\n    corners_norm -= np.array([0.5, 0.5], dtype=boxes.dtype)\n    corners_norm = corners_norm.reshape(4, 2)\n    for i in range(num_boxes):\n        if valid_mask[i]:\n            for j in range(num_tests):\n                current_box[0, :] = boxes[i]\n                current_radius = np.sqrt(boxes[i, 0]**2 + boxes[i, 1]**2)\n                current_grot = np.arctan2(boxes[i, 0], boxes[i, 1])\n                dst_grot = current_grot + global_rot_noises[i, j]\n                dst_pos[0] = current_radius * np.sin(dst_grot)\n                dst_pos[1] = current_radius * np.cos(dst_grot)\n                current_box[0, :2] = dst_pos\n                current_box[0, -1] += (dst_grot - current_grot)\n\n                rot_sin = np.sin(current_box[0, -1])\n                rot_cos = np.cos(current_box[0, -1])\n                rot_mat_T[0, 0] = rot_cos\n                rot_mat_T[0, 1] = rot_sin\n                rot_mat_T[1, 0] = -rot_sin\n                rot_mat_T[1, 1] = rot_cos\n                current_corners[:] = current_box[\n                    0, 2:4] * corners_norm @ rot_mat_T + current_box[0, :2]\n                current_corners -= current_box[0, :2]\n                _rotation_box2d_jit_(current_corners, rot_noises[i, j],\n                                     rot_mat_T)\n                current_corners += current_box[0, :2] + loc_noises[i, j, :2]\n                coll_mat = box_collision_test(\n                    current_corners.reshape(1, 4, 2), box_corners)\n                coll_mat[0, i] = False\n                if not coll_mat.any():\n                    success_mask[i] = j\n                    box_corners[i] = current_corners\n                    loc_noises[i, j, :2] += (dst_pos - boxes[i, :2])\n                    rot_noises[i, j] += (dst_grot - current_grot)\n                    break\n    return success_mask\n\n\ndef _select_transform(transform, indices):\n    \"\"\"Select transform.\n\n    Args:\n        transform (np.ndarray): Transforms to select from.\n        indices (np.ndarray): Mask to indicate which transform to select.\n\n    Returns:\n        np.ndarray: Selected transforms.\n    \"\"\"\n    result = np.zeros((transform.shape[0], *transform.shape[2:]),\n                      dtype=transform.dtype)\n    for i in range(transform.shape[0]):\n        if indices[i] != -1:\n            result[i] = transform[i, indices[i]]\n    return result\n\n\n@numba.njit\ndef _rotation_matrix_3d_(rot_mat_T, angle, axis):\n    \"\"\"Get the 3D rotation matrix.\n\n    Args:\n        rot_mat_T (np.ndarray): Transposed rotation matrix.\n        angle (float): Rotation angle.\n        axis (int): Rotation axis.\n    \"\"\"\n    rot_sin = np.sin(angle)\n    rot_cos = np.cos(angle)\n    rot_mat_T[:] = np.eye(3)\n    if axis == 1:\n        rot_mat_T[0, 0] = rot_cos\n        rot_mat_T[0, 2] = rot_sin\n        rot_mat_T[2, 0] = -rot_sin\n        rot_mat_T[2, 2] = rot_cos\n    elif axis == 2 or axis == -1:\n        rot_mat_T[0, 0] = rot_cos\n        rot_mat_T[0, 1] = rot_sin\n        rot_mat_T[1, 0] = -rot_sin\n        rot_mat_T[1, 1] = rot_cos\n    elif axis == 0:\n        rot_mat_T[1, 1] = rot_cos\n        rot_mat_T[1, 2] = rot_sin\n        rot_mat_T[2, 1] = -rot_sin\n        rot_mat_T[2, 2] = rot_cos\n\n\n@numba.njit\ndef points_transform_(points, centers, point_masks, loc_transform,\n                      rot_transform, valid_mask):\n    \"\"\"Apply transforms to points and box centers.\n\n    Args:\n        points (np.ndarray): Input points.\n        centers (np.ndarray): Input box centers.\n        point_masks (np.ndarray): Mask to indicate which points need\n            to be transformed.\n        loc_transform (np.ndarray): Location transform to be applied.\n        rot_transform (np.ndarray): Rotation transform to be applied.\n        valid_mask (np.ndarray): Mask to indicate which boxes are valid.\n    \"\"\"\n    num_box = centers.shape[0]\n    num_points = points.shape[0]\n    rot_mat_T = np.zeros((num_box, 3, 3), dtype=points.dtype)\n    for i in range(num_box):\n        _rotation_matrix_3d_(rot_mat_T[i], rot_transform[i], 2)\n    for i in range(num_points):\n        for j in range(num_box):\n            if valid_mask[j]:\n                if point_masks[i, j] == 1:\n                    points[i, :3] -= centers[j, :3]\n                    points[i:i + 1, :3] = points[i:i + 1, :3] @ rot_mat_T[j]\n                    points[i, :3] += centers[j, :3]\n                    points[i, :3] += loc_transform[j]\n                    break  # only apply first box's transform\n\n\n@numba.njit\ndef box3d_transform_(boxes, loc_transform, rot_transform, valid_mask):\n    \"\"\"Transform 3D boxes.\n\n    Args:\n        boxes (np.ndarray): 3D boxes to be transformed.\n        loc_transform (np.ndarray): Location transform to be applied.\n        rot_transform (np.ndarray): Rotation transform to be applied.\n        valid_mask (np.ndarray): Mask to indicate which boxes are valid.\n    \"\"\"\n    num_box = boxes.shape[0]\n    for i in range(num_box):\n        if valid_mask[i]:\n            boxes[i, :3] += loc_transform[i]\n            boxes[i, 6] += rot_transform[i]\n\n\ndef noise_per_object_v3_(gt_boxes,\n                         points=None,\n                         valid_mask=None,\n                         rotation_perturb=np.pi / 4,\n                         center_noise_std=1.0,\n                         global_random_rot_range=np.pi / 4,\n                         num_try=100):\n    \"\"\"Random rotate or remove each groundtruth independently. use kitti viewer\n    to test this function points_transform_\n\n    Args:\n        gt_boxes (np.ndarray): Ground truth boxes with shape (N, 7).\n        points (np.ndarray, optional): Input point cloud with\n            shape (M, 4). Default: None.\n        valid_mask (np.ndarray, optional): Mask to indicate which\n            boxes are valid. Default: None.\n        rotation_perturb (float, optional): Rotation perturbation.\n            Default: pi / 4.\n        center_noise_std (float, optional): Center noise standard deviation.\n            Default: 1.0.\n        global_random_rot_range (float, optional): Global random rotation\n            range. Default: pi/4.\n        num_try (int, optional): Number of try. Default: 100.\n    \"\"\"\n    num_boxes = gt_boxes.shape[0]\n    if not isinstance(rotation_perturb, (list, tuple, np.ndarray)):\n        rotation_perturb = [-rotation_perturb, rotation_perturb]\n    if not isinstance(global_random_rot_range, (list, tuple, np.ndarray)):\n        global_random_rot_range = [\n            -global_random_rot_range, global_random_rot_range\n        ]\n    enable_grot = np.abs(global_random_rot_range[0] -\n                         global_random_rot_range[1]) >= 1e-3\n\n    if not isinstance(center_noise_std, (list, tuple, np.ndarray)):\n        center_noise_std = [\n            center_noise_std, center_noise_std, center_noise_std\n        ]\n    if valid_mask is None:\n        valid_mask = np.ones((num_boxes, ), dtype=np.bool_)\n    center_noise_std = np.array(center_noise_std, dtype=gt_boxes.dtype)\n\n    loc_noises = np.random.normal(\n        scale=center_noise_std, size=[num_boxes, num_try, 3])\n    rot_noises = np.random.uniform(\n        rotation_perturb[0], rotation_perturb[1], size=[num_boxes, num_try])\n    gt_grots = np.arctan2(gt_boxes[:, 0], gt_boxes[:, 1])\n    grot_lowers = global_random_rot_range[0] - gt_grots\n    grot_uppers = global_random_rot_range[1] - gt_grots\n    global_rot_noises = np.random.uniform(\n        grot_lowers[..., np.newaxis],\n        grot_uppers[..., np.newaxis],\n        size=[num_boxes, num_try])\n\n    origin = (0.5, 0.5, 0)\n    gt_box_corners = box_np_ops.center_to_corner_box3d(\n        gt_boxes[:, :3],\n        gt_boxes[:, 3:6],\n        gt_boxes[:, 6],\n        origin=origin,\n        axis=2)\n\n    # TODO: rewrite this noise box function?\n    if not enable_grot:\n        selected_noise = noise_per_box(gt_boxes[:, [0, 1, 3, 4, 6]],\n                                       valid_mask, loc_noises, rot_noises)\n    else:\n        selected_noise = noise_per_box_v2_(gt_boxes[:, [0, 1, 3, 4, 6]],\n                                           valid_mask, loc_noises, rot_noises,\n                                           global_rot_noises)\n\n    loc_transforms = _select_transform(loc_noises, selected_noise)\n    rot_transforms = _select_transform(rot_noises, selected_noise)\n    surfaces = box_np_ops.corner_to_surfaces_3d_jit(gt_box_corners)\n    if points is not None:\n        # TODO: replace this points_in_convex function by my tools?\n        point_masks = box_np_ops.points_in_convex_polygon_3d_jit(\n            points[:, :3], surfaces)\n        points_transform_(points, gt_boxes[:, :3], point_masks, loc_transforms,\n                          rot_transforms, valid_mask)\n\n    box3d_transform_(gt_boxes, loc_transforms, rot_transforms, valid_mask)\n"
  },
  {
    "path": "mmdet3d/datasets/pipelines/dbsampler.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\nimport os\nimport warnings\n\nimport mmcv\nimport numpy as np\n\nfrom mmdet3d.core.bbox import box_np_ops\nfrom mmdet3d.datasets.pipelines import data_augment_utils\nfrom ..builder import OBJECTSAMPLERS, PIPELINES\n\n\nclass BatchSampler:\n    \"\"\"Class for sampling specific category of ground truths.\n\n    Args:\n        sample_list (list[dict]): List of samples.\n        name (str, optional): The category of samples. Default: None.\n        epoch (int, optional): Sampling epoch. Default: None.\n        shuffle (bool, optional): Whether to shuffle indices. Default: False.\n        drop_reminder (bool, optional): Drop reminder. Default: False.\n    \"\"\"\n\n    def __init__(self,\n                 sampled_list,\n                 name=None,\n                 epoch=None,\n                 shuffle=True,\n                 drop_reminder=False):\n        self._sampled_list = sampled_list\n        self._indices = np.arange(len(sampled_list))\n        if shuffle:\n            np.random.shuffle(self._indices)\n        self._idx = 0\n        self._example_num = len(sampled_list)\n        self._name = name\n        self._shuffle = shuffle\n        self._epoch = epoch\n        self._epoch_counter = 0\n        self._drop_reminder = drop_reminder\n\n    def _sample(self, num):\n        \"\"\"Sample specific number of ground truths and return indices.\n\n        Args:\n            num (int): Sampled number.\n\n        Returns:\n            list[int]: Indices of sampled ground truths.\n        \"\"\"\n        if self._idx + num >= self._example_num:\n            ret = self._indices[self._idx:].copy()\n            self._reset()\n        else:\n            ret = self._indices[self._idx:self._idx + num]\n            self._idx += num\n        return ret\n\n    def _reset(self):\n        \"\"\"Reset the index of batchsampler to zero.\"\"\"\n        assert self._name is not None\n        # print(\"reset\", self._name)\n        if self._shuffle:\n            np.random.shuffle(self._indices)\n        self._idx = 0\n\n    def sample(self, num):\n        \"\"\"Sample specific number of ground truths.\n\n        Args:\n            num (int): Sampled number.\n\n        Returns:\n            list[dict]: Sampled ground truths.\n        \"\"\"\n        indices = self._sample(num)\n        return [self._sampled_list[i] for i in indices]\n\n\n@OBJECTSAMPLERS.register_module()\nclass DataBaseSampler(object):\n    \"\"\"Class for sampling data from the ground truth database.\n\n    Args:\n        info_path (str): Path of groundtruth database info.\n        data_root (str): Path of groundtruth database.\n        rate (float): Rate of actual sampled over maximum sampled number.\n        prepare (dict): Name of preparation functions and the input value.\n        sample_groups (dict): Sampled classes and numbers.\n        classes (list[str], optional): List of classes. Default: None.\n        bbox_code_size (int, optional): The number of bbox dimensions.\n            Default: None.\n        points_loader(dict, optional): Config of points loader. Default:\n            dict(type='LoadPointsFromFile', load_dim=4, use_dim=[0,1,2,3])\n    \"\"\"\n\n    def __init__(self,\n                 info_path,\n                 data_root,\n                 rate,\n                 prepare,\n                 sample_groups,\n                 classes=None,\n                 bbox_code_size=None,\n                 points_loader=dict(\n                     type='LoadPointsFromFile',\n                     coord_type='LIDAR',\n                     load_dim=4,\n                     use_dim=[0, 1, 2, 3]),\n                 file_client_args=dict(backend='disk')):\n        super().__init__()\n\n        self.data_root = data_root\n        self.info_path = info_path\n        self.rate = rate\n        self.prepare = prepare\n        self.classes = classes\n        self.cat2label = {name: i for i, name in enumerate(classes)}\n        self.label2cat = {i: name for i, name in enumerate(classes)}\n        self.points_loader = mmcv.build_from_cfg(points_loader, PIPELINES)\n        self.file_client = mmcv.FileClient(**file_client_args)\n\n        # load data base infos\n        if hasattr(self.file_client, 'get_local_path'):\n            with self.file_client.get_local_path(info_path) as local_path:\n                # loading data from a file-like object needs file format\n                db_infos = mmcv.load(open(local_path, 'rb'), file_format='pkl')\n        else:\n            warnings.warn(\n                'The used MMCV version does not have get_local_path. '\n                f'We treat the {info_path} as local paths and it '\n                'might cause errors if the path is not a local path. '\n                'Please use MMCV>= 1.3.16 if you meet errors.')\n            db_infos = mmcv.load(info_path)\n\n        # filter database infos\n        from mmdet3d.utils import get_root_logger\n        logger = get_root_logger()\n        for k, v in db_infos.items():\n            logger.info(f'load {len(v)} {k} database infos')\n        for prep_func, val in prepare.items():\n            db_infos = getattr(self, prep_func)(db_infos, val)\n        logger.info('After filter database:')\n        for k, v in db_infos.items():\n            logger.info(f'load {len(v)} {k} database infos')\n\n        self.db_infos = db_infos\n\n        self.bbox_code_size = bbox_code_size\n        if bbox_code_size is not None:\n            for k, info_cls in self.db_infos.items():\n                for info in info_cls:\n                    info['box3d_lidar'] = info['box3d_lidar'][:self.\n                                                              bbox_code_size]\n\n        # load sample groups\n        # TODO: more elegant way to load sample groups\n        self.sample_groups = []\n        for name, num in sample_groups.items():\n            self.sample_groups.append({name: int(num)})\n\n        self.group_db_infos = self.db_infos  # just use db_infos\n\n        self.sample_classes = []\n        self.sample_max_nums = []\n        for group_info in self.sample_groups:\n            self.sample_classes += list(group_info.keys())\n            self.sample_max_nums += list(group_info.values())\n\n        self.sampler_dict = {}\n        for k, v in self.group_db_infos.items():\n            self.sampler_dict[k] = BatchSampler(v, k, shuffle=True)\n        # TODO: No group_sampling currently\n\n    @staticmethod\n    def filter_by_difficulty(db_infos, removed_difficulty):\n        \"\"\"Filter ground truths by difficulties.\n\n        Args:\n            db_infos (dict): Info of groundtruth database.\n            removed_difficulty (list): Difficulties that are not qualified.\n\n        Returns:\n            dict: Info of database after filtering.\n        \"\"\"\n        \n        new_db_infos = {}\n        for key, dinfos in db_infos.items():\n            \n            new_db_infos[key] = [\n                info for info in dinfos\n                if info.get('difficulty', 0) not in removed_difficulty\n            ]\n        return new_db_infos\n\n    @staticmethod\n    def filter_by_min_points(db_infos, min_gt_points_dict):\n        \"\"\"Filter ground truths by number of points in the bbox.\n\n        Args:\n            db_infos (dict): Info of groundtruth database.\n            min_gt_points_dict (dict): Different number of minimum points\n                needed for different categories of ground truths.\n\n        Returns:\n            dict: Info of database after filtering.\n        \"\"\"\n        for name, min_num in min_gt_points_dict.items():\n            min_num = int(min_num)\n            if min_num > 0:\n                filtered_infos = []\n                for info in db_infos[name]:\n                    if info['num_points_in_gt'] >= min_num:\n                        filtered_infos.append(info)\n                db_infos[name] = filtered_infos\n        return db_infos\n\n    def sample_all(self, gt_bboxes, gt_labels, img=None, ground_plane=None):\n        \"\"\"Sampling all categories of bboxes.\n\n        Args:\n            gt_bboxes (np.ndarray): Ground truth bounding boxes.\n            gt_labels (np.ndarray): Ground truth labels of boxes.\n\n        Returns:\n            dict: Dict of sampled 'pseudo ground truths'.\n\n                - gt_labels_3d (np.ndarray): ground truths labels\n                    of sampled objects.\n                - gt_bboxes_3d (:obj:`BaseInstance3DBoxes`):\n                    sampled ground truth 3D bounding boxes\n                - points (np.ndarray): sampled points\n                - group_ids (np.ndarray): ids of sampled ground truths\n        \"\"\"\n        sampled_num_dict = {}\n        sample_num_per_class = []\n        for class_name, max_sample_num in zip(self.sample_classes,\n                                              self.sample_max_nums):\n            class_label = self.cat2label[class_name]\n            # sampled_num = int(max_sample_num -\n            #                   np.sum([n == class_name for n in gt_names]))\n            sampled_num = int(max_sample_num -\n                              np.sum([n == class_label for n in gt_labels]))\n            sampled_num = np.round(self.rate * sampled_num).astype(np.int64)\n            sampled_num_dict[class_name] = sampled_num\n            sample_num_per_class.append(sampled_num)\n\n        sampled = []\n        sampled_gt_bboxes = []\n        avoid_coll_boxes = gt_bboxes\n\n        for class_name, sampled_num in zip(self.sample_classes,\n                                           sample_num_per_class):\n            if sampled_num > 0:\n                sampled_cls = self.sample_class_v2(class_name, sampled_num,\n                                                   avoid_coll_boxes)\n\n                sampled += sampled_cls\n                if len(sampled_cls) > 0:\n                    if len(sampled_cls) == 1:\n                        sampled_gt_box = sampled_cls[0]['box3d_lidar'][\n                            np.newaxis, ...]\n                    else:\n                        sampled_gt_box = np.stack(\n                            [s['box3d_lidar'] for s in sampled_cls], axis=0)\n\n                    sampled_gt_bboxes += [sampled_gt_box]\n                    avoid_coll_boxes = np.concatenate(\n                        [avoid_coll_boxes, sampled_gt_box], axis=0)\n\n        ret = None\n        if len(sampled) > 0:\n            sampled_gt_bboxes = np.concatenate(sampled_gt_bboxes, axis=0)\n            # center = sampled_gt_bboxes[:, 0:3]\n\n            # num_sampled = len(sampled)\n            s_points_list = []\n            count = 0\n            for info in sampled:\n                file_path = os.path.join(\n                    self.data_root,\n                    info['path']) if self.data_root else info['path']\n                results = dict(pts_filename=file_path)\n                s_points = self.points_loader(results)['points']\n                s_points.translate(info['box3d_lidar'][:3])\n\n                count += 1\n\n                s_points_list.append(s_points)\n\n            gt_labels = np.array([self.cat2label[s['name']] for s in sampled],\n                                 dtype=np.long)\n\n            if ground_plane is not None:\n                xyz = sampled_gt_bboxes[:, :3]\n                dz = (ground_plane[:3][None, :] *\n                      xyz).sum(-1) + ground_plane[3]\n                sampled_gt_bboxes[:, 2] -= dz\n                for i, s_points in enumerate(s_points_list):\n                    s_points.tensor[:, 2].sub_(dz[i])\n\n            ret = {\n                'gt_labels_3d':\n                gt_labels,\n                'gt_bboxes_3d':\n                sampled_gt_bboxes,\n                'points':\n                s_points_list[0].cat(s_points_list),\n                'group_ids':\n                np.arange(gt_bboxes.shape[0],\n                          gt_bboxes.shape[0] + len(sampled))\n            }\n\n        return ret\n\n    def sample_class_v2(self, name, num, gt_bboxes):\n        \"\"\"Sampling specific categories of bounding boxes.\n\n        Args:\n            name (str): Class of objects to be sampled.\n            num (int): Number of sampled bboxes.\n            gt_bboxes (np.ndarray): Ground truth boxes.\n\n        Returns:\n            list[dict]: Valid samples after collision test.\n        \"\"\"\n        sampled = self.sampler_dict[name].sample(num)\n        sampled = copy.deepcopy(sampled)\n        num_gt = gt_bboxes.shape[0]\n        num_sampled = len(sampled)\n        gt_bboxes_bv = box_np_ops.center_to_corner_box2d(\n            gt_bboxes[:, 0:2], gt_bboxes[:, 3:5], gt_bboxes[:, 6])\n\n        sp_boxes = np.stack([i['box3d_lidar'] for i in sampled], axis=0)\n        boxes = np.concatenate([gt_bboxes, sp_boxes], axis=0).copy()\n\n        sp_boxes_new = boxes[gt_bboxes.shape[0]:]\n        sp_boxes_bv = box_np_ops.center_to_corner_box2d(\n            sp_boxes_new[:, 0:2], sp_boxes_new[:, 3:5], sp_boxes_new[:, 6])\n\n        total_bv = np.concatenate([gt_bboxes_bv, sp_boxes_bv], axis=0)\n        coll_mat = data_augment_utils.box_collision_test(total_bv, total_bv)\n        diag = np.arange(total_bv.shape[0])\n        coll_mat[diag, diag] = False\n\n        valid_samples = []\n        for i in range(num_gt, num_gt + num_sampled):\n            if coll_mat[i].any():\n                coll_mat[i] = False\n                coll_mat[:, i] = False\n            else:\n                valid_samples.append(sampled[i - num_gt])\n        return valid_samples\n"
  },
  {
    "path": "mmdet3d/datasets/pipelines/formating.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nfrom mmcv.parallel import DataContainer as DC\n\nfrom mmdet3d.core.bbox import BaseInstance3DBoxes\nfrom mmdet3d.core.points import BasePoints\nfrom mmdet.datasets.pipelines import to_tensor\nfrom ..builder import PIPELINES\n\n\n@PIPELINES.register_module()\nclass DefaultFormatBundle(object):\n    \"\"\"Default formatting bundle.\n\n    It simplifies the pipeline of formatting common fields, including \"img\",\n    \"proposals\", \"gt_bboxes\", \"gt_labels\", \"gt_masks\" and \"gt_semantic_seg\".\n    These fields are formatted as follows.\n\n    - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)\n    - proposals: (1)to tensor, (2)to DataContainer\n    - gt_bboxes: (1)to tensor, (2)to DataContainer\n    - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer\n    - gt_labels: (1)to tensor, (2)to DataContainer\n    - gt_masks: (1)to tensor, (2)to DataContainer (cpu_only=True)\n    - gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor,\n                       (3)to DataContainer (stack=True)\n    \"\"\"\n\n    def __init__(self, ):\n        return\n\n    def __call__(self, results):\n        \"\"\"Call function to transform and format common fields in results.\n\n        Args:\n            results (dict): Result dict contains the data to convert.\n\n        Returns:\n            dict: The result dict contains the data that is formatted with\n                default bundle.\n        \"\"\"\n        if 'img' in results:\n            if isinstance(results['img'], list):\n                # process multiple imgs in single frame\n                imgs = [img.transpose(2, 0, 1) for img in results['img']]\n                imgs = np.ascontiguousarray(np.stack(imgs, axis=0))\n                results['img'] = DC(to_tensor(imgs), stack=True)\n            else:\n                img = np.ascontiguousarray(results['img'].transpose(2, 0, 1))\n                results['img'] = DC(to_tensor(img), stack=True)\n        for key in [\n                'proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels',\n                'gt_labels_3d', 'attr_labels', 'pts_instance_mask',\n                'pts_semantic_mask', 'depths'\n        ]:\n            if key not in results:\n                continue\n            if isinstance(results[key], list):\n                results[key] = DC([to_tensor(res) for res in results[key]])\n            else:\n                results[key] = DC(to_tensor(results[key]))\n        if 'gt_bboxes_3d' in results:\n            if isinstance(results['gt_bboxes_3d'], BaseInstance3DBoxes):\n                results['gt_bboxes_3d'] = DC(\n                    results['gt_bboxes_3d'], cpu_only=True)\n            else:\n                results['gt_bboxes_3d'] = DC(\n                    to_tensor(results['gt_bboxes_3d']))\n\n        for key in ['centers2d', 'depths2d', 'gt_labels_2d', 'gt_bboxes_2d']:\n            if key in results:\n                results[key] = DC(results[key], cpu_only=True, stack=False)\n        \n        for key in ['gt_ego_lcf_feat', 'gt_ego_fut_trajs', 'gt_ego_his_trajs', 'gt_ego_fut_cmd', 'gt_ego_fut_masks', 'vad_ego_fut_trajs']:\n            if key in results:\n                results[key] = DC(results[key], stack=False)\n        \n        for key in ['gt_agent_fut_traj', 'gt_agent_fut_traj_mask', 'gt_agent_fut_abs_traj']:\n            if key in results:\n                results[key] = DC(results[key], cpu_only=False, stack=False)\n\n        if 'gt_masks' in results:\n            results['gt_masks'] = DC(results['gt_masks'], cpu_only=True)\n\n        if 'gt_semantic_seg' in results:\n            results['gt_semantic_seg'] = DC(\n                to_tensor(results['gt_semantic_seg'][None, ...]), stack=True)\n    \n        if 'can_bus_info' in results:\n            results['can_bus_info'] = DC(\n                to_tensor(results['can_bus_info'][None, ...]), stack=False)\n\n        if 'gt_fut_segmentations' in results:\n            results['gt_fut_segmentations'] = DC(\n                to_tensor(results['gt_fut_segmentations']), stack=True)\n            results['gt_fut_segmentations_plus'] = DC(\n                to_tensor(results['gt_fut_segmentations_plus']), stack=True)\n        if 'fut_boxes_in_cur_ego_list' in results:\n            results['fut_boxes_in_cur_ego_list'] = DC(\n                    results['fut_boxes_in_cur_ego_list'], cpu_only=True, stack=False)\n        return results\n\n    def __repr__(self):\n        return self.__class__.__name__\n\n\n@PIPELINES.register_module()\nclass Collect3D(object):\n    \"\"\"Collect data from the loader relevant to the specific task.\n\n    This is usually the last stage of the data loader pipeline. Typically keys\n    is set to some subset of \"img\", \"proposals\", \"gt_bboxes\",\n    \"gt_bboxes_ignore\", \"gt_labels\", and/or \"gt_masks\".\n\n    The \"img_meta\" item is always populated.  The contents of the \"img_meta\"\n    dictionary depends on \"meta_keys\". By default this includes:\n\n        - 'img_shape': shape of the image input to the network as a tuple\n            (h, w, c).  Note that images may be zero padded on the\n            bottom/right if the batch tensor is larger than this shape.\n        - 'scale_factor': a float indicating the preprocessing scale\n        - 'flip': a boolean indicating if image flip transform was used\n        - 'filename': path to the image file\n        - 'ori_shape': original shape of the image as a tuple (h, w, c)\n        - 'pad_shape': image shape after padding\n        - 'lidar2img': transform from lidar to image\n        - 'depth2img': transform from depth to image\n        - 'cam2img': transform from camera to image\n        - 'pcd_horizontal_flip': a boolean indicating if point cloud is\n            flipped horizontally\n        - 'pcd_vertical_flip': a boolean indicating if point cloud is\n            flipped vertically\n        - 'box_mode_3d': 3D box mode\n        - 'box_type_3d': 3D box type\n        - 'img_norm_cfg': a dict of normalization information:\n            - mean: per channel mean subtraction\n            - std: per channel std divisor\n            - to_rgb: bool indicating if bgr was converted to rgb\n        - 'pcd_trans': point cloud transformations\n        - 'sample_idx': sample index\n        - 'pcd_scale_factor': point cloud scale factor\n        - 'pcd_rotation': rotation applied to point cloud\n        - 'pts_filename': path to point cloud file.\n\n    Args:\n        keys (Sequence[str]): Keys of results to be collected in ``data``.\n        meta_keys (Sequence[str], optional): Meta keys to be converted to\n            ``mmcv.DataContainer`` and collected in ``data[img_metas]``.\n            Default: ('filename', 'ori_shape', 'img_shape', 'lidar2img',\n            'depth2img', 'cam2img', 'pad_shape', 'scale_factor', 'flip',\n            'pcd_horizontal_flip', 'pcd_vertical_flip', 'box_mode_3d',\n            'box_type_3d', 'img_norm_cfg', 'pcd_trans',\n            'sample_idx', 'pcd_scale_factor', 'pcd_rotation', 'pts_filename')\n    \"\"\"\n\n    def __init__(\n        self,\n        keys,\n        meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img',\n                   'depth2img', 'cam2img', 'pad_shape', 'scale_factor', 'flip',\n                   'pcd_horizontal_flip', 'pcd_vertical_flip', 'box_mode_3d',\n                   'box_type_3d', 'img_norm_cfg', 'pcd_trans', 'sample_idx',\n                   'pcd_scale_factor', 'pcd_rotation', 'pcd_rotation_angle',\n                   'pts_filename', 'transformation_3d_flow', 'trans_mat', 'index',\n                   'sequence_group_idx', 'curr_to_prev_lidar_rt', 'curr_to_prev_ego_rt',\n                    'start_of_sequence', 'index', 'global_to_curr_lidar_rt', 'tta_config', 'input_size',\n                    'prev_lidar_to_global_rt', 'sample_index', 'scene_name', 'curr', 'nuscenes_get_rt_matrix', 'aux_cam_params',\n                   'affine_aug', 'ego_pose_inv', 'ego_pose', 'timestamp', 'has_valid_map', 'instance_inds')):\n        self.keys = keys\n        self.meta_keys = meta_keys\n\n    def __call__(self, results):\n        \"\"\"Call function to collect keys in results. The keys in ``meta_keys``\n        will be converted to :obj:`mmcv.DataContainer`.\n\n        Args:\n            results (dict): Result dict contains the data to collect.\n\n        Returns:\n            dict: The result dict contains the following keys\n                - keys in ``self.keys``\n                - ``img_metas``\n        \"\"\"\n        data = {}\n        img_metas = {}\n\n        for key in self.meta_keys:\n            if key in results:\n                img_metas[key] = results[key]\n\n        data['img_metas'] = DC(img_metas, cpu_only=True)\n        for key in self.keys:\n            data[key] = results[key]\n        return data\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        return self.__class__.__name__ + \\\n            f'(keys={self.keys}, meta_keys={self.meta_keys})'\n\n\n@PIPELINES.register_module()\nclass DefaultFormatBundle3D(DefaultFormatBundle):\n    \"\"\"Default formatting bundle.\n\n    It simplifies the pipeline of formatting common fields for voxels,\n    including \"proposals\", \"gt_bboxes\", \"gt_labels\", \"gt_masks\" and\n    \"gt_semantic_seg\".\n    These fields are formatted as follows.\n\n    - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)\n    - proposals: (1)to tensor, (2)to DataContainer\n    - gt_bboxes: (1)to tensor, (2)to DataContainer\n    - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer\n    - gt_labels: (1)to tensor, (2)to DataContainer\n    \"\"\"\n\n    def __init__(self, class_names, with_gt=True, with_label=True):\n        super(DefaultFormatBundle3D, self).__init__()\n        self.class_names = class_names\n        self.with_gt = with_gt\n        self.with_label = with_label\n\n    def __call__(self, results):\n        \"\"\"Call function to transform and format common fields in results.\n\n        Args:\n            results (dict): Result dict contains the data to convert.\n\n        Returns:\n            dict: The result dict contains the data that is formatted with\n                default bundle.\n        \"\"\"\n        # Format 3D data\n        if 'points' in results:\n            assert isinstance(results['points'], BasePoints)\n            results['points'] = DC(results['points'].tensor)\n\n        for key in ['voxels', 'coors', 'voxel_centers', 'num_points']:\n            if key not in results:\n                continue\n            results[key] = DC(to_tensor(results[key]), stack=False)\n\n        if self.with_gt:\n            # Clean GT bboxes in the final\n            if 'gt_bboxes_3d_mask' in results:\n                gt_bboxes_3d_mask = results['gt_bboxes_3d_mask']\n                results['gt_bboxes_3d'] = results['gt_bboxes_3d'][\n                    gt_bboxes_3d_mask]\n                if 'gt_names_3d' in results:\n                    results['gt_names_3d'] = results['gt_names_3d'][\n                        gt_bboxes_3d_mask]\n                if 'centers2d' in results:\n                    results['centers2d'] = results['centers2d'][\n                        gt_bboxes_3d_mask]\n                if 'depths' in results:\n                    results['depths'] = results['depths'][gt_bboxes_3d_mask]\n            if 'gt_bboxes_mask' in results:\n                gt_bboxes_mask = results['gt_bboxes_mask']\n                if 'gt_bboxes' in results:\n                    results['gt_bboxes'] = results['gt_bboxes'][gt_bboxes_mask]\n                results['gt_names'] = results['gt_names'][gt_bboxes_mask]\n            if self.with_label:\n                if 'gt_names' in results and len(results['gt_names']) == 0:\n                    results['gt_labels'] = np.array([], dtype=np.int64)\n                    results['attr_labels'] = np.array([], dtype=np.int64)\n                elif 'gt_names' in results and isinstance(\n                        results['gt_names'][0], list):\n                    # gt_labels might be a list of list in multi-view setting\n                    results['gt_labels'] = [\n                        np.array([self.class_names.index(n) for n in res],\n                                 dtype=np.int64) for res in results['gt_names']\n                    ]\n                elif 'gt_names' in results:\n                    results['gt_labels'] = np.array([\n                        self.class_names.index(n) for n in results['gt_names']\n                    ],\n                                                    dtype=np.int64)\n                # we still assume one pipeline for one frame LiDAR\n                # thus, the 3D name is list[string]\n                if 'gt_names_3d' in results:\n                    results['gt_labels_3d'] = np.array([\n                        self.class_names.index(n)\n                        for n in results['gt_names_3d']\n                    ],\n                                                       dtype=np.int64)\n        results = super(DefaultFormatBundle3D, self).__call__(results)\n        return results\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(class_names={self.class_names}, '\n        repr_str += f'with_gt={self.with_gt}, with_label={self.with_label})'\n        return repr_str\n"
  },
  {
    "path": "mmdet3d/datasets/pipelines/loading.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport numpy as np\nimport torch\nfrom PIL import Image\nfrom pyquaternion import Quaternion\nimport os.path as osp\nfrom mmdet3d.core.points import BasePoints, get_points_type\nfrom mmdet.datasets.pipelines import LoadAnnotations, LoadImageFromFile\nfrom ...core.bbox import LiDARInstance3DBoxes\nfrom ..builder import PIPELINES\nfrom copy import deepcopy\nimport cv2\nimport os\nfrom torchvision.transforms.functional import rotate\nfrom mmdet3d.datasets.vector_map import VectorizedLocalMap, LiDARInstanceLines\nfrom nuscenes.eval.common.utils import quaternion_yaw\nfrom nuscenes.eval.common.utils import Quaternion as Quaternion_nus\n# from .vad_custom_nuscenes_eval import NuScenesEval_custom\nfrom nuscenes.eval.common.utils import center_distance\nfrom mmcv.parallel import DataContainer as DC\nimport random\nfrom mmdet3d.core import LiDARInstance3DBoxes\nfrom nuscenes.utils.data_classes import Box as NuScenesBox\n# from projects.mmdet3d_plugin.core.bbox.structures.nuscenes_box import CustomNuscenesBox\nfrom shapely import affinity, ops\nfrom shapely.geometry import LineString, box, MultiPolygon, MultiLineString\nfrom mmdet.datasets.pipelines import to_tensor\nfrom nuscenes.map_expansion.map_api import NuScenesMap, NuScenesMapExplorer\nfrom nuscenes.eval.detection.constants import DETECTION_NAMES\nfrom mmcv.runner import get_dist_info\nfrom nuscenes.utils.data_classes import Box as NuScenesBox\nimport pyquaternion\nimport torch.nn as nn\n\n@PIPELINES.register_module()\nclass LoadVectorMap(object):\n\n    def __init__(self, data_root, point_cloud_range, map_fixed_ptsnum_per_line=20, map_classes=['divider', 'ped_crossing', 'boundary'], **kwargs):\n        patch_h = point_cloud_range[4]-point_cloud_range[1]\n        patch_w = point_cloud_range[3]-point_cloud_range[0]\n        self.patch_size = (min(patch_h, 50), patch_w)\n        self.vector_map = VectorizedLocalMap(data_root,  patch_size=self.patch_size, map_classes=map_classes, \n                            fixed_ptsnum_per_line=map_fixed_ptsnum_per_line)\n\n\n    def vectormap_pipeline(self, location, ego2global_translation, patch_angle, flip_dx, flip_dy):\n        '''\n        `example` type: <class 'dict'>\n            keys: 'img_metas', 'gt_bboxes_3d', 'gt_labels_3d', 'img';\n                  all keys type is 'DataContainer';\n                  'img_metas' cpu_only=True, type is dict, others are false;\n                  'gt_labels_3d' shape torch.size([num_samples]), stack=False,\n                                padding_value=0, cpu_only=False\n                  'gt_bboxes_3d': stack=False, cpu_only=True\n        '''\n\n        anns_results = self.vector_map.gen_vectorized_samples(\n            location, ego2global_translation, patch_angle, flip_dx, flip_dy\n        )\n        has_valid_map = True\n        if len(anns_results['gt_vecs_label']) == 0:    \n            ## params that can generate non-empty anns\n            location = 'boston-seaport'\n            ego2global_translation = [1178.1282, 1140.1135, 0.0]\n            patch_angle = 143.6049566307475\n            flip_dx = False\n            flip_dy = False\n            ## \n            pseudo_anns_results = self.vector_map.gen_vectorized_samples(\n                location, ego2global_translation, patch_angle, flip_dx, flip_dy\n            )\n            anns_results = pseudo_anns_results\n            has_valid_map = False\n\n        \n\n        '''\n        anns_results, type: dict\n            'gt_vecs_pts_loc': list[num_vecs], vec with num_points*2 coordinates\n            'gt_vecs_pts_num': list[num_vecs], vec with num_points\n            'gt_vecs_label': list[num_vecs], vec with cls index\n        '''\n        gt_vecs_label = to_tensor(anns_results['gt_vecs_label'])\n        if isinstance(anns_results['gt_vecs_pts_loc'], LiDARInstanceLines):\n            gt_vecs_pts_loc = anns_results['gt_vecs_pts_loc']\n        else:\n            gt_vecs_pts_loc = to_tensor(anns_results['gt_vecs_pts_loc'])\n            try:\n                gt_vecs_pts_loc = gt_vecs_pts_loc.flatten(1).to(dtype=torch.float32)\n            except:\n                assert False\n                # empty tensor, will be passed in train, \n                # but we preserve it for test\n                gt_vecs_pts_loc = gt_vecs_pts_loc\n\n        return dict(\n            map_gt_labels_3d = DC(gt_vecs_label, cpu_only=False),\n            map_gt_bboxes_3d = DC(gt_vecs_pts_loc, cpu_only=True),\n            has_valid_map = has_valid_map,\n        )\n\n    def __call__(self, results):\n\n        ego2global_translation = list(results['ori_ego_pose'][:3,3].numpy())\n        # ego2global_rotation = list(Quaternion_nus(matrix=ego2global.numpy(), rtol=eps, atol=eps).q)\n        v = np.dot( results['ori_ego_pose'][:3,:3].numpy(), np.array([1, 0, 0]))\n        yaw = np.arctan2(v[1], v[0])\n        ori_patch_angle = yaw / np.pi * 180\n\n        # v = np.dot(ego2global[:3,:3].numpy(), np.array([1, 0, 0]))\n        # yaw = np.arctan2(v[1], v[0])\n        # patch_angle2 = yaw / np.pi * 180\n\n        results.update(\n            self.vectormap_pipeline(results['curr']['map_location'], ego2global_translation, ori_patch_angle-results['rotate_bda'], results['flip_dx'], results['flip_dy'])\n        )\n        return results\n\n\n@PIPELINES.register_module()\nclass LoadVectorMap2(object):\n\n    def __init__(self, data_root, point_cloud_range, map_fixed_ptsnum_per_line=20, map_classes=['divider', 'ped_crossing', 'boundary'], **kwargs):\n        patch_h = point_cloud_range[4]-point_cloud_range[1]\n        patch_w = point_cloud_range[3]-point_cloud_range[0]\n        self.point_cloud_range = torch.tensor(point_cloud_range)\n        self.patch_size = (min(patch_h, 50), patch_w)\n        self.vector_map = VectorizedLocalMap(data_root,  patch_size=self.patch_size, map_classes=map_classes, \n                            fixed_ptsnum_per_line=map_fixed_ptsnum_per_line)\n\n\n    def vectormap_pipeline(self, location, ego2global_translation, patch_angle, flip_dx, flip_dy):\n        '''\n        `example` type: <class 'dict'>\n            keys: 'img_metas', 'gt_bboxes_3d', 'gt_labels_3d', 'img';\n                  all keys type is 'DataContainer';\n                  'img_metas' cpu_only=True, type is dict, others are false;\n                  'gt_labels_3d' shape torch.size([num_samples]), stack=False,\n                                padding_value=0, cpu_only=False\n                  'gt_bboxes_3d': stack=False, cpu_only=True\n        '''\n\n        anns_results = self.vector_map.gen_vectorized_samples(\n            location, ego2global_translation, patch_angle, flip_dx, flip_dy\n        )\n        has_valid_map = True\n        if len(anns_results['gt_vecs_label']) == 0:    \n            ## params that can generate non-empty anns\n            location = 'boston-seaport'\n            ego2global_translation = [1178.1282, 1140.1135, 0.0]\n            patch_angle = 143.6049566307475\n            flip_dx = False\n            flip_dy = False\n            ## \n            pseudo_anns_results = self.vector_map.gen_vectorized_samples(\n                location, ego2global_translation, patch_angle, flip_dx, flip_dy\n            )\n            anns_results = pseudo_anns_results\n            has_valid_map = False\n\n        \n\n        '''\n        anns_results, type: dict\n            'gt_vecs_pts_loc': list[num_vecs], vec with num_points*2 coordinates\n            'gt_vecs_pts_num': list[num_vecs], vec with num_points\n            'gt_vecs_label': list[num_vecs], vec with cls index\n        '''\n        gt_vecs_label = to_tensor(anns_results['gt_vecs_label'])\n        if isinstance(anns_results['gt_vecs_pts_loc'], LiDARInstanceLines):\n            gt_vecs_pts_loc = anns_results['gt_vecs_pts_loc']\n        else:\n            gt_vecs_pts_loc = to_tensor(anns_results['gt_vecs_pts_loc'])\n            try:\n                gt_vecs_pts_loc = gt_vecs_pts_loc.flatten(1).to(dtype=torch.float32)\n            except:\n                assert False\n                # empty tensor, will be passed in train, \n                # but we preserve it for test\n                gt_vecs_pts_loc = gt_vecs_pts_loc\n\n        gt_pts = gt_vecs_pts_loc.shift_fixed_num_sampled_points_v2\n        gt_pts = (gt_pts - self.point_cloud_range[:2])/(self.point_cloud_range[3:5]-self.point_cloud_range[:2])\n        return dict(\n            map_gt_labels_3d = DC(gt_vecs_label, cpu_only=False),\n            map_gt_bboxes_3d = DC(gt_pts, cpu_only=False),\n            has_valid_map = has_valid_map,\n        )\n\n    def __call__(self, results):\n\n        ego2global_translation = list(results['ori_ego_pose'][:3,3].numpy())\n        # ego2global_rotation = list(Quaternion_nus(matrix=ego2global.numpy(), rtol=eps, atol=eps).q)\n        v = np.dot( results['ori_ego_pose'][:3,:3].numpy(), np.array([1, 0, 0]))\n        yaw = np.arctan2(v[1], v[0])\n        ori_patch_angle = yaw / np.pi * 180\n\n        # v = np.dot(ego2global[:3,:3].numpy(), np.array([1, 0, 0]))\n        # yaw = np.arctan2(v[1], v[0])\n        # patch_angle2 = yaw / np.pi * 180\n\n        results.update(\n            self.vectormap_pipeline(results['curr']['map_location'], ego2global_translation, ori_patch_angle-results['rotate_bda'], results['flip_dx'], results['flip_dy'])\n        )\n        return results\n\n\n@PIPELINES.register_module()\nclass LoadGTPlaner(object):\n    def __init__(self):\n        pass\n    \n    def __call__(self, results):\n\n        results['gt_ego_lcf_feat'] = to_tensor(results['curr']['gt_ego_lcf_feat'])\n        results['gt_ego_lcf_feat'][:2] = (results['bda_mat'][:2, :2] @ results['gt_ego_lcf_feat'][:2, None]).squeeze(-1)\n        results['gt_ego_fut_trajs'] = torch.cumsum(to_tensor(results['curr']['gt_ego_fut_trajs']), dim=0)[:6]\n        results['gt_ego_fut_trajs'] = (results['bda_mat'][:2,:2] @ results['gt_ego_fut_trajs'][..., None]).squeeze(-1)\n        results['gt_ego_his_trajs'] = -to_tensor(results['curr']['gt_ego_his_trajs'])\n        results['gt_ego_his_trajs'] = (results['bda_mat'][:2,:2] @ results['gt_ego_his_trajs'][..., None]).squeeze(-1)\n        if results['gt_ego_fut_trajs'][-1][1] >= 2:\n            command = np.array([1, 0, 0])  # Turn Right\n        elif results['gt_ego_fut_trajs'][-1][1] <= -2:\n            command = np.array([0, 1, 0])  # Turn Left\n        else:\n            command = np.array([0, 0, 1])  # Go Straight\n        \n        results['gt_ego_fut_cmd'] = to_tensor(command)\n        results['gt_ego_fut_masks'] = to_tensor(results['curr']['gt_ego_fut_masks'])[: 6]\n        return results\n\n\n\n@PIPELINES.register_module()\nclass LoadGTMotion(object):\n    def __init__(self, with_ego_as_agent=False):\n        self.with_ego_as_agent = with_ego_as_agent\n    \n    def __call__(self, results):\n\n        agent_fut_traj_mask = torch.tensor(np.array(results['curr']['ann_infos']['fut_traj_mask']), dtype=torch.float32)\n        agent_fut_traj = torch.tensor(np.array(results['curr']['ann_infos']['fut_traj']), dtype=torch.float32)\n        agent_fut_traj = torch.cat([agent_fut_traj, torch.ones_like(agent_fut_traj[..., 0:2])], dim=-1)\n        if len(agent_fut_traj)>0:\n            agent_fut_traj = (results['ego_pose_inv'] @ agent_fut_traj.unsqueeze(-1)).squeeze(-1)[..., :2] * agent_fut_traj_mask\n            \n        if self.with_ego_as_agent:\n            gt_ego_fut_trajs = torch.cumsum(to_tensor(results['curr']['gt_ego_fut_trajs']), dim=0)[: 6]\n            gt_ego_fut_trajs = torch.cat([gt_ego_fut_trajs, torch.zeros_like(gt_ego_fut_trajs[:2])])\n            agent_fut_traj = torch.cat([gt_ego_fut_trajs[None], agent_fut_traj], 0)\n            \n            gt_fut_traj_mask = torch.ones_like(gt_ego_fut_trajs)\n            gt_fut_traj_mask[-2:] = 0\n            agent_fut_traj_mask = torch.cat([gt_fut_traj_mask[None], agent_fut_traj_mask], 0)\n\n        centers = results['gt_bboxes_3d'].center[..., :2]\n        try:\n            tmp = torch.cat([centers[:, None], agent_fut_traj], 1)\n        except:\n            print(centers.shape, agent_fut_traj.shape, agent_fut_traj_mask.shape, results['gt_labels_3d'].shape)\n        agent_fut_traj = tmp[:, 1:] - tmp[:, :-1]\n        results['gt_agent_fut_traj_mask'] = agent_fut_traj_mask\n        results['gt_agent_fut_traj'] = agent_fut_traj\n\n        return results\n\n\n@PIPELINES.register_module()\nclass LoadFutBoxInfo(object):\n    def __init__(self, add_boundary=True):\n        self.X_BOUND = [-50.0, 50.0, 0.1]  # Forward\n        self.Y_BOUND = [-50.0, 50.0, 0.1]  # Sides\n        self.Z_BOUND = [-10.0, 10.0, 20.0]  # Height\n        dx, bx, _ = self.gen_dx_bx(self.X_BOUND, self.Y_BOUND, self.Z_BOUND)\n        self.dx, self.bx = dx[:2], bx[:2]\n    \n        bev_resolution, bev_start_position, bev_dimension = self.calculate_birds_eye_view_parameters(\n            self.X_BOUND, self.Y_BOUND, self.Z_BOUND\n        )\n        self.bev_resolution = bev_resolution.numpy()\n        self.bev_start_position = bev_start_position.numpy()\n        self.bev_dimension = bev_dimension.numpy()\n        ego_width, ego_length = 1.85, 4.084\n        self.W = ego_width\n        self.H = ego_length\n\n        self.category_index = {\n            'human':[2,3,4,5,6,7,8],\n            'vehicle':[14,15,16,17,18,19,20,21,22,23]\n        }\n        self.add_boundary = add_boundary\n        # self.n_future = n_future\n\n        # self.add_state(\"obj_col\", default=torch.zeros(self.n_future), dist_reduce_fx=\"sum\")\n        # self.add_state(\"obj_box_col\", default=torch.zeros(self.n_future), dist_reduce_fx=\"sum\")\n        # self.add_state(\"L2\", default=torch.zeros(self.n_future),dist_reduce_fx=\"sum\")\n        # self.add_state(\"total\", default=torch.tensor(0), dist_reduce_fx=\"sum\")\n\n    def gen_dx_bx(self, xbound, ybound, zbound):\n        dx = torch.Tensor([row[2] for row in [xbound, ybound, zbound]])\n        bx = torch.Tensor([row[0] + row[2]/2.0 for row in [xbound, ybound, zbound]])\n        nx = torch.LongTensor([(row[1] - row[0]) / row[2] for row in [xbound, ybound, zbound]])\n\n        return dx, bx, nx\n    \n    def calculate_birds_eye_view_parameters(self, x_bounds, y_bounds, z_bounds):\n        \"\"\"\n        Parameters\n        ----------\n            x_bounds: Forward direction in the ego-car.\n            y_bounds: Sides\n            z_bounds: Height\n\n        Returns\n        -------\n            bev_resolution: Bird's-eye view bev_resolution\n            bev_start_position Bird's-eye view first element\n            bev_dimension Bird's-eye view tensor spatial dimension\n        \"\"\"\n        bev_resolution = torch.tensor([row[2] for row in [x_bounds, y_bounds, z_bounds]])\n        bev_start_position = torch.tensor([row[0] + row[2] / 2.0 for row in [x_bounds, y_bounds, z_bounds]])\n        bev_dimension = torch.tensor([(row[1] - row[0]) / row[2] for row in [x_bounds, y_bounds, z_bounds]],\n                                    dtype=torch.long)\n\n        return bev_resolution, bev_start_position, bev_dimension\n    \n    def get_label(\n            self,\n            boxes_in_cur_ego_list,\n            labels_in_cur_ego_list\n        ):\n        segmentation_np, pedestrian_np = self.get_birds_eye_view_label(boxes_in_cur_ego_list, labels_in_cur_ego_list)\n        segmentation = torch.from_numpy(segmentation_np).long()\n        pedestrian = torch.from_numpy(pedestrian_np).long()\n\n\n        return segmentation, pedestrian\n\n    def world2bev_vis(self, x, y):\n            return int((x - self.bx[0].item()) / self.dx[0].item()), int((y - self.bx[1].item()) / self.dx[1].item())\n    \n    def get_birds_eye_view_label(self, boxes_in_cur_ego_list, labels_in_cur_ego_list):\n        T = 6\n        segmentation = np.zeros((T,self.bev_dimension[0], self.bev_dimension[1]))\n        pedestrian = np.zeros((T,self.bev_dimension[0], self.bev_dimension[1]))\n\n        for k, fut_boxes in enumerate(boxes_in_cur_ego_list):\n            if fut_boxes is None: continue\n            for i, corners in enumerate(fut_boxes.corners[:, [4, 7, 3, 0], :2]):\n                \n                # fitler vehicle\n                vehicle_classes = ['car', 'bus', 'construction_vehicle',\n                           'bicycle', 'motorcycle', 'truck', 'trailer']\n                if labels_in_cur_ego_list[k][i] not in  [0, 1, 2, 3, 4, 6, 7]: continue \n                corners = np.array([self.world2bev_vis(*corner) for corner in corners])\n                cv2.fillPoly(segmentation[k], [corners], 1.0)\n                \n        return segmentation, pedestrian\n\n    def __call__(self, results):\n\n        ego2global_rotation = results['nuscenes_get_rt_matrix']['ego2global_rotation']\n        ego2global_translation =results['nuscenes_get_rt_matrix'][\n            'ego2global_translation']\n        trans = -np.array(ego2global_translation)\n        rot = Quaternion(ego2global_rotation).inverse\n\n        boxes_in_cur_ego_list = []\n        for gt_boxes_each_frame in results['fut_boxes_info']:\n            boxes_in_cur_ego = []\n            if len(gt_boxes_each_frame)==0:\n                boxes_in_cur_ego_list.append(None)\n                continue\n            for box in gt_boxes_each_frame:\n                center = box[:3]\n                wlh = box[3:6]\n                box_yaw = box[6]\n                box_vel = box[7:].tolist()\n                box_vel.append(0)\n                quat = pyquaternion.Quaternion(axis=[0, 0, 1], radians=box_yaw)\n                nusc_box = NuScenesBox(center, wlh, quat, velocity=box_vel)\n                nusc_box.translate(trans)\n                nusc_box.rotate(rot)\n                box_xyz = np.array(nusc_box.center)\n                box_dxdydz = np.array(nusc_box.wlh)\n                box_yaw = np.array([nusc_box.orientation.yaw_pitch_roll[0]])\n                box_velo = np.array(nusc_box.velocity[:2])\n                gt_box = np.concatenate([box_xyz, box_dxdydz, box_yaw, box_velo])\n                boxes_in_cur_ego.append(gt_box)\n            boxes_in_cur_ego = torch.tensor(np.array(boxes_in_cur_ego))\n            boxes_in_cur_ego = LiDARInstance3DBoxes(boxes_in_cur_ego, box_dim=boxes_in_cur_ego.shape[-1],\n                                 origin=(0.5, 0.5, 0.5))\n\n            boxes_in_cur_ego_list.append(boxes_in_cur_ego)\n            \n        results['fut_boxes_in_cur_ego_list'] = boxes_in_cur_ego_list\n        segmentation, pedestrian = self.get_label(boxes_in_cur_ego_list, results['fut_labels_info'])\n\n        \n        segmentation_plus = segmentation.permute(1, 2, 0).cpu().clone().numpy()\n        segmentation_plus *= 0 # only consider boudnary, temporal\n        map_gt_bboxes_3d = results['map_gt_bboxes_3d'].data.fixed_num_sampled_points\n        map_gt_bboxes_3d= map_gt_bboxes_3d[ results['map_gt_labels_3d'].data==2]\n        map_gt_bboxes_3d = (map_gt_bboxes_3d - self.bx.cpu().numpy() ) / (self.dx.cpu().numpy())\n        a = segmentation_plus[:, :, :3].copy()\n        a = np.ascontiguousarray(a, dtype=np.uint8)\n        b = segmentation_plus[:, :, :3].copy()\n        b = np.ascontiguousarray(a, dtype=np.uint8)\n        for line in map_gt_bboxes_3d:\n            line = line.clip(0, 999).numpy().astype(np.int32)\n            for i, corner in enumerate(line[:-1]):\n                a = cv2.line(a, tuple(line[i]), tuple(line[i+1]), color=(1, 1, 1), thickness=1)\n                b = cv2.line(b, tuple(line[i]), tuple(line[i+1]), color=(1, 1, 1), thickness=1)   \n        segmentation_plus = torch.cat([torch.tensor(a), torch.tensor(b)], -1).permute(2, 0, 1)\n\n        results['gt_fut_segmentations'] = segmentation\n        results['gt_fut_segmentations_plus'] = segmentation_plus\n        return results\n\n\n@PIPELINES.register_module()\nclass LoadSemanticImageMask(object):\n    def __init__(self, mask_file_path='./data/nus_sem'):\n        self.mask_file_path = mask_file_path\n    \n    def __call__(self, results):\n\n        masks = []\n        for cam in results['cam_names']:\n            data_token = results['curr']['cams'][cam]['sample_data_token']\n            filename = osp.join(self.mask_file_path, data_token+'.png')\n            img = Image.open(filename)\n            img_augs = results['img_augs'][cam]\n            resize, resize_dims, crop, flip, rotate = img_augs        \n            img = self.img_transform_core(img, resize_dims, crop, flip, rotate)\n            img = np.array(img)\n            masks.append(img)\n        masks = np.stack(masks, 0)\n        results['gt_img_sem_masks'] = to_tensor(masks)\n        return results\n        \n    \n    def img_transform_core(self, img, resize_dims, crop, flip, rotate):\n        # adjust image\n        img = img.resize(resize_dims, resample=0)\n        img = img.crop(crop)\n        if flip:\n            img = img.transpose(method=Image.FLIP_LEFT_RIGHT)\n        img = img.rotate(rotate, resample=0, expand=0)\n        return img\n\n\n@PIPELINES.register_module()\nclass LoadMultiViewImageFromFiles(object):\n    \"\"\"Load multi channel images from a list of separate channel files.\n\n    Expects results['img_filename'] to be a list of filenames.\n\n    Args:\n        to_float32 (bool, optional): Whether to convert the img to float32.\n            Defaults to False.\n        color_type (str, optional): Color type of the file.\n            Defaults to 'unchanged'.\n    \"\"\"\n\n    def __init__(self, to_float32=False, color_type='unchanged'):\n        self.to_float32 = to_float32\n        self.color_type = color_type\n\n    def __call__(self, results):\n        \"\"\"Call function to load multi-view image from files.\n\n        Args:\n            results (dict): Result dict containing multi-view image filenames.\n\n        Returns:\n            dict: The result dict containing the multi-view image data.\n                Added keys and values are described below.\n\n                - filename (str): Multi-view image filenames.\n                - img (np.ndarray): Multi-view image arrays.\n                - img_shape (tuple[int]): Shape of multi-view image arrays.\n                - ori_shape (tuple[int]): Shape of original image arrays.\n                - pad_shape (tuple[int]): Shape of padded image arrays.\n                - scale_factor (float): Scale factor.\n                - img_norm_cfg (dict): Normalization configuration of images.\n        \"\"\"\n        filename = results['img_filename']\n        # img is of shape (h, w, c, num_views)\n        img = np.stack(\n            [mmcv.imread(name, self.color_type) for name in filename], axis=-1)\n        if self.to_float32:\n            img = img.astype(np.float32)\n        results['filename'] = filename\n        # unravel to list, see `DefaultFormatBundle` in formatting.py\n        # which will transpose each image separately and then stack into array\n        results['img'] = [img[..., i] for i in range(img.shape[-1])]\n        results['img_shape'] = img.shape\n        results['ori_shape'] = img.shape\n        # Set initial values for default meta_keys\n        results['pad_shape'] = img.shape\n        results['scale_factor'] = 1.0\n        num_channels = 1 if len(img.shape) < 3 else img.shape[2]\n        results['img_norm_cfg'] = dict(\n            mean=np.zeros(num_channels, dtype=np.float32),\n            std=np.ones(num_channels, dtype=np.float32),\n            to_rgb=False)\n        return results\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(to_float32={self.to_float32}, '\n        repr_str += f\"color_type='{self.color_type}')\"\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass LoadImageFromFileMono3D(object):\n    \"\"\"Load an image from file in monocular 3D object detection. Compared to 2D\n    detection, additional camera parameters need to be loaded.\n\n    Args:\n        kwargs (dict): Arguments are the same as those in\n            :class:`LoadImageFromFile`.\n    \"\"\"\n\n\n    def __call__(self, results):\n        \"\"\"Call functions to load image and get image meta information.\n\n        Args:\n            results (dict): Result dict from :obj:`mmdet.CustomDataset`.\n\n        Returns:\n            dict: The dict contains loaded image and meta information.\n        \"\"\"\n        super().__call__(results)\n        results['cam2img'] = results['img_info']['cam_intrinsic']\n        return results\n\n@PIPELINES.register_module()\nclass LoadOccupancy(object):\n    \"\"\"Load an image from file in monocular 3D object detection. Compared to 2D\n    detection, additional camera parameters need to be loaded.\n\n    Args:\n        kwargs (dict): Arguments are the same as those in\n            :class:`LoadImageFromFile`.\n    \"\"\"\n\n    def __init__(self, occupancy_path='/mount/dnn_data/occupancy_2023/gts',\n                    num_classes=17,\n                    ignore_nonvisible=False,\n                    mask='mask_camera',\n                    ignore_classes=[],\n                    fix_void=True) :\n        self.occupancy_path = occupancy_path\n        self.num_classes = num_classes\n        self.ignore_nonvisible = ignore_nonvisible\n        self.mask = mask\n\n        self.ignore_classes=ignore_classes\n\n        self.fix_void = fix_void\n\n\n    def __call__(self, results):\n        \"\"\"Call functions to load image and get image meta information.\n\n        Args:\n            results (dict): Result dict from :obj:`mmdet.CustomDataset`.\n\n        Returns:\n            dict: The dict contains loaded image and meta information.\n        \"\"\"\n\n        scene_name = results['curr']['scene_name']\n        sample_token = results['curr']['token']\n\n\n        occupancy_file_path = osp.join(self.occupancy_path, scene_name, sample_token, 'labels.npz')\n        data = np.load(occupancy_file_path)\n        occupancy = torch.tensor(data['semantics'])\n        visible_mask = torch.tensor(data[self.mask])\n        # visible_mask_lidar = data['mask_lidar']\n\n        if self.ignore_nonvisible:\n            occupancy[~visible_mask.to(torch.bool)] = 255\n\n\n        # to BEVDet format\n        occupancy = occupancy.permute(2, 0, 1)\n        occupancy = torch.rot90(occupancy, 1, [1, 2])\n        occupancy = torch.flip(occupancy, [1])\n        occupancy = occupancy.permute(1, 2, 0)\n\n\n        if self.fix_void:\n            occupancy[occupancy<255] = occupancy[occupancy<255] + 1\n\n        for class_ in self.ignore_classes:\n            occupancy[occupancy==class_] = 255\n\n        if results['rotate_bda'] != 0:\n            occupancy = occupancy.permute(2, 0, 1)\n            occupancy = rotate(occupancy, -results['rotate_bda'], fill=255).permute(1, 2, 0)\n\n        if results['flip_dx']:\n            occupancy = torch.flip(occupancy, [1])\n\n        if results['flip_dy']:\n            occupancy = torch.flip(occupancy, [0])\n\n\n\n        results['gt_occupancy'] = occupancy\n        results['visible_mask'] = visible_mask\n        \n        results['visible_mask_bev'] = (occupancy==255).sum(-1)\n\n        return results\n\n\n@PIPELINES.register_module()\nclass LoadPointsFromMultiSweeps(object):\n    \"\"\"Load points from multiple sweeps.\n\n    This is usually used for nuScenes dataset to utilize previous sweeps.\n\n    Args:\n        sweeps_num (int, optional): Number of sweeps. Defaults to 10.\n        load_dim (int, optional): Dimension number of the loaded points.\n            Defaults to 5.\n        use_dim (list[int], optional): Which dimension to use.\n            Defaults to [0, 1, 2, 4].\n        time_dim (int, optional): Which dimension to represent the timestamps\n            of each points. Defaults to 4.\n        file_client_args (dict, optional): Config dict of file clients,\n            refer to\n            https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py\n            for more details. Defaults to dict(backend='disk').\n        pad_empty_sweeps (bool, optional): Whether to repeat keyframe when\n            sweeps is empty. Defaults to False.\n        remove_close (bool, optional): Whether to remove close points.\n            Defaults to False.\n        test_mode (bool, optional): If `test_mode=True`, it will not\n            randomly sample sweeps but select the nearest N frames.\n            Defaults to False.\n    \"\"\"\n\n    def __init__(self,\n                 sweeps_num=10,\n                 load_dim=5,\n                 use_dim=[0, 1, 2, 4],\n                 time_dim=4,\n                 file_client_args=dict(backend='disk'),\n                 pad_empty_sweeps=False,\n                 remove_close=False,\n                 translate2ego=False,\n                 test_mode=False):\n        self.load_dim = load_dim\n        self.sweeps_num = sweeps_num\n        self.use_dim = use_dim\n        self.time_dim = time_dim\n        assert time_dim < load_dim, \\\n            f'Expect the timestamp dimension < {load_dim}, got {time_dim}'\n        self.file_client_args = file_client_args.copy()\n        self.file_client = None\n        self.pad_empty_sweeps = pad_empty_sweeps\n        self.remove_close = remove_close\n        self.test_mode = test_mode\n        assert max(use_dim) < load_dim, \\\n            f'Expect all used dimensions < {load_dim}, got {use_dim}'\n        self.translate2ego = translate2ego\n        \n    def _load_points(self, pts_filename):\n        \"\"\"Private function to load point clouds data.\n\n        Args:\n            pts_filename (str): Filename of point clouds data.\n\n        Returns:\n            np.ndarray: An array containing point clouds data.\n        \"\"\"\n        if self.file_client is None:\n            self.file_client = mmcv.FileClient(**self.file_client_args)\n        try:\n            pts_bytes = self.file_client.get(pts_filename)\n            points = np.frombuffer(pts_bytes, dtype=np.float32)\n        except ConnectionError:\n            mmcv.check_file_exist(pts_filename)\n            if pts_filename.endswith('.npy'):\n                points = np.load(pts_filename)\n            else:\n                points = np.fromfile(pts_filename, dtype=np.float32)\n        return points\n\n    def _remove_close(self, points, radius=1.0):\n        \"\"\"Removes point too close within a certain radius from origin.\n\n        Args:\n            points (np.ndarray | :obj:`BasePoints`): Sweep points.\n            radius (float, optional): Radius below which points are removed.\n                Defaults to 1.0.\n\n        Returns:\n            np.ndarray: Points after removing.\n        \"\"\"\n        if isinstance(points, np.ndarray):\n            points_numpy = points\n        elif isinstance(points, BasePoints):\n            points_numpy = points.tensor.numpy()\n        else:\n            raise NotImplementedError\n        x_filt = np.abs(points_numpy[:, 0]) < radius\n        y_filt = np.abs(points_numpy[:, 1]) < radius\n        not_close = np.logical_not(np.logical_and(x_filt, y_filt))\n        return points[not_close]\n\n    def __call__(self, results):\n        \"\"\"Call function to load multi-sweep point clouds from files.\n\n        Args:\n            results (dict): Result dict containing multi-sweep point cloud\n                filenames.\n\n        Returns:\n            dict: The result dict containing the multi-sweep points data.\n                Added key and value are described below.\n\n                - points (np.ndarray | :obj:`BasePoints`): Multi-sweep point\n                    cloud arrays.\n        \"\"\"\n        points = results['points']\n        points.tensor[:, self.time_dim] = 0\n        sweep_points_list = [points]\n        ts = results['timestamp']\n\n        if self.pad_empty_sweeps and len(results['sweeps']) == 0:\n            for i in range(self.sweeps_num):\n                if self.remove_close:\n                    sweep_points_list.append(self._remove_close(points))\n                else:\n                    sweep_points_list.append(points)\n        else:\n            if len(results['sweeps']) <= self.sweeps_num:\n                choices = np.arange(len(results['sweeps']))\n            elif self.test_mode:\n                choices = np.arange(self.sweeps_num)\n            else:\n                choices = np.random.choice(\n                    len(results['sweeps']), self.sweeps_num, replace=False)\n            for idx in choices:\n                sweep = results['sweeps'][idx]\n                points_sweep = self._load_points(sweep['data_path'])\n                points_sweep = np.copy(points_sweep).reshape(-1, self.load_dim)\n                if self.remove_close:\n                    points_sweep = self._remove_close(points_sweep)\n                sweep_ts = sweep['timestamp'] / 1e6\n                points_sweep[:, :3] = points_sweep[:, :3] @ sweep[\n                    'sensor2lidar_rotation'].T\n                points_sweep[:, :3] += sweep['sensor2lidar_translation']\n                points_sweep[:, self.time_dim] = ts - sweep_ts\n                points_sweep = points.new_point(points_sweep)\n                sweep_points_list.append(points_sweep)\n\n        points = points.cat(sweep_points_list)\n        points = points[:, self.use_dim]\n        results['points'] = points\n        if self.translate2ego:\n            lidar2lidarego = np.eye(4, dtype=np.float32)\n            lidar2lidarego[:3, :3] = Quaternion(\n            results['curr']['lidar2ego_rotation']).rotation_matrix\n            lidar2lidarego[:3, 3] = results['curr']['lidar2ego_translation']\n            lidar2lidarego = to_tensor(lidar2lidarego)\n            results['points'].tensor[:, :3]  = results['points'].tensor[:, :3].matmul(lidar2lidarego[:3, :3].T) + lidar2lidarego[:3, 3]\n        return results\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        return f'{self.__class__.__name__}(sweeps_num={self.sweeps_num})'\n\n\n@PIPELINES.register_module()\nclass PointsFromLidartoEgo(object):\n    \n    def __init__(self, translate2ego=True, ego_cam='CAM_FRONT'):\n        self.ego_cam=ego_cam\n        self.translate2ego = translate2ego\n\n    def __call__(self, results):\n        if self.translate2ego:\n            # lidar2lidarego = np.eye(4, dtype=np.float32)\n            # lidar2lidarego[:3, :3] = Quaternion(\n            # results['curr']['lidar2ego_rotation']).rotation_matrix\n            # lidar2lidarego[:3, 3] = results['curr']['lidar2ego_translation']\n            # lidar2lidarego = to_tensor(lidar2lidarego)\n            # results['points'].tensor[:, :3]  = results['points'].tensor[:, :3].matmul(lidar2lidarego[:3, :3].T) + lidar2lidarego[:3, 3]\n\n            lidar2lidarego = np.eye(4, dtype=np.float32)\n            lidar2lidarego[:3, :3] = Quaternion(\n                results['curr']['lidar2ego_rotation']).rotation_matrix\n            lidar2lidarego[:3, 3] = results['curr']['lidar2ego_translation']\n\n            lidarego2global = np.eye(4, dtype=np.float32)\n            lidarego2global[:3, :3] = Quaternion(\n                results['curr']['ego2global_rotation']).rotation_matrix\n            lidarego2global[:3, 3] = results['curr']['ego2global_translation']\n\n            camego2global = np.eye(4, dtype=np.float32)\n            camego2global[:3, :3] = Quaternion(\n                results['curr']['cams'][self.ego_cam]\n                ['ego2global_rotation']).rotation_matrix\n            camego2global[:3, 3] = results['curr']['cams'][self.ego_cam][\n                'ego2global_translation']\n            lidar2camego = np.linalg.inv(camego2global) @ lidarego2global @ lidar2lidarego\n            lidar2camego = to_tensor(lidar2camego)\n            results['points'].tensor[:, :3]  = results['points'].tensor[:, :3].matmul(lidar2camego[:3, :3].T) + lidar2camego[:3, 3]\n\n        return results\n\n\n@PIPELINES.register_module()\nclass PointSegClassMapping(object):\n    \"\"\"Map original semantic class to valid category ids.\n\n    Map valid classes as 0~len(valid_cat_ids)-1 and\n    others as len(valid_cat_ids).\n\n    Args:\n        valid_cat_ids (tuple[int]): A tuple of valid category.\n        max_cat_id (int, optional): The max possible cat_id in input\n            segmentation mask. Defaults to 40.\n    \"\"\"\n\n    def __init__(self, valid_cat_ids, max_cat_id=40):\n        assert max_cat_id >= np.max(valid_cat_ids), \\\n            'max_cat_id should be greater than maximum id in valid_cat_ids'\n\n        self.valid_cat_ids = valid_cat_ids\n        self.max_cat_id = int(max_cat_id)\n\n        # build cat_id to class index mapping\n        neg_cls = len(valid_cat_ids)\n        self.cat_id2class = np.ones(\n            self.max_cat_id + 1, dtype=np.int) * neg_cls\n        for cls_idx, cat_id in enumerate(valid_cat_ids):\n            self.cat_id2class[cat_id] = cls_idx\n\n    def __call__(self, results):\n        \"\"\"Call function to map original semantic class to valid category ids.\n\n        Args:\n            results (dict): Result dict containing point semantic masks.\n\n        Returns:\n            dict: The result dict containing the mapped category ids.\n                Updated key and value are described below.\n\n                - pts_semantic_mask (np.ndarray): Mapped semantic masks.\n        \"\"\"\n        assert 'pts_semantic_mask' in results\n        pts_semantic_mask = results['pts_semantic_mask']\n\n        converted_pts_sem_mask = self.cat_id2class[pts_semantic_mask]\n\n        results['pts_semantic_mask'] = converted_pts_sem_mask\n        return results\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(valid_cat_ids={self.valid_cat_ids}, '\n        repr_str += f'max_cat_id={self.max_cat_id})'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass NormalizePointsColor(object):\n    \"\"\"Normalize color of points.\n\n    Args:\n        color_mean (list[float]): Mean color of the point cloud.\n    \"\"\"\n\n    def __init__(self, color_mean):\n        self.color_mean = color_mean\n\n    def __call__(self, results):\n        \"\"\"Call function to normalize color of points.\n\n        Args:\n            results (dict): Result dict containing point clouds data.\n\n        Returns:\n            dict: The result dict containing the normalized points.\n                Updated key and value are described below.\n\n                - points (:obj:`BasePoints`): Points after color normalization.\n        \"\"\"\n        points = results['points']\n        assert points.attribute_dims is not None and \\\n            'color' in points.attribute_dims.keys(), \\\n            'Expect points have color attribute'\n        if self.color_mean is not None:\n            points.color = points.color - \\\n                points.color.new_tensor(self.color_mean)\n        points.color = points.color / 255.0\n        results['points'] = points\n        return results\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(color_mean={self.color_mean})'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass LoadPointsFromFile(object):\n    \"\"\"Load Points From File.\n\n    Load points from file.\n\n    Args:\n        coord_type (str): The type of coordinates of points cloud.\n            Available options includes:\n            - 'LIDAR': Points in LiDAR coordinates.\n            - 'DEPTH': Points in depth coordinates, usually for indoor dataset.\n            - 'CAMERA': Points in camera coordinates.\n        load_dim (int, optional): The dimension of the loaded points.\n            Defaults to 6.\n        use_dim (list[int], optional): Which dimensions of the points to use.\n            Defaults to [0, 1, 2]. For KITTI dataset, set use_dim=4\n            or use_dim=[0, 1, 2, 3] to use the intensity dimension.\n        shift_height (bool, optional): Whether to use shifted height.\n            Defaults to False.\n        use_color (bool, optional): Whether to use color features.\n            Defaults to False.\n        file_client_args (dict, optional): Config dict of file clients,\n            refer to\n            https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py\n            for more details. Defaults to dict(backend='disk').\n    \"\"\"\n\n    def __init__(self,\n                 coord_type,\n                 load_dim=6,\n                 use_dim=[0, 1, 2],\n                 shift_height=False,\n                 use_color=False,\n                 dtype='float32',\n                 file_client_args=dict(backend='disk'),\n                 translate2ego=True,\n                 ):\n        self.shift_height = shift_height\n        self.use_color = use_color\n        if isinstance(use_dim, int):\n            use_dim = list(range(use_dim))\n        assert max(use_dim) < load_dim, \\\n            f'Expect all used dimensions < {load_dim}, got {use_dim}'\n        assert coord_type in ['CAMERA', 'LIDAR', 'DEPTH']\n\n        self.coord_type = coord_type\n        self.load_dim = load_dim\n        self.use_dim = use_dim\n        self.file_client_args = file_client_args.copy()\n        self.file_client = None\n        if dtype=='float32':\n            self.dtype = np.float32\n        elif dtype== 'float16':\n            self.dtype = np.float16\n        else:\n            assert False\n        self.translate2ego = translate2ego\n\n    def _load_points(self, pts_filename):\n        \"\"\"Private function to load point clouds data.\n\n        Args:\n            pts_filename (str): Filename of point clouds data.\n\n        Returns:\n            np.ndarray: An array containing point clouds data.\n        \"\"\"\n        if self.file_client is None:\n            self.file_client = mmcv.FileClient(**self.file_client_args)\n        try:\n            pts_bytes = self.file_client.get(pts_filename)\n            points = np.frombuffer(pts_bytes, dtype=self.dtype)\n        except ConnectionError:\n            mmcv.check_file_exist(pts_filename)\n            if pts_filename.endswith('.npy'):\n                points = np.load(pts_filename)\n            else:\n                points = np.fromfile(pts_filename, dtype=self.dtype)\n\n        return points\n\n\n    def __call__(self, results):\n        \"\"\"Call function to load points data from file.\n\n        Args:\n            results (dict): Result dict containing point clouds data.\n\n        Returns:\n            dict: The result dict containing the point clouds data.\n                Added key and value are described below.\n\n                - points (:obj:`BasePoints`): Point clouds data.\n        \"\"\"\n        pts_filename = results['pts_filename']\n        points = self._load_points(pts_filename)\n        points = points.reshape(-1, self.load_dim)\n        points = points[:, self.use_dim]\n\n\n\n        attribute_dims = None\n\n        if self.shift_height:\n            floor_height = np.percentile(points[:, 2], 0.99)\n            height = points[:, 2] - floor_height\n            points = np.concatenate(\n                [points[:, :3],\n                 np.expand_dims(height, 1), points[:, 3:]], 1)\n            attribute_dims = dict(height=3)\n\n        if self.use_color:\n            assert len(self.use_dim) >= 6\n            if attribute_dims is None:\n                attribute_dims = dict()\n            attribute_dims.update(\n                dict(color=[\n                    points.shape[1] - 3,\n                    points.shape[1] - 2,\n                    points.shape[1] - 1,\n                ]))\n\n        points_class = get_points_type(self.coord_type)\n        points = points_class(\n            points, points_dim=points.shape[-1], attribute_dims=attribute_dims)\n\n        results['points'] = points\n        if self.translate2ego:\n            lidar2lidarego = np.eye(4, dtype=np.float32)\n            lidar2lidarego[:3, :3] = Quaternion(\n            results['curr']['lidar2ego_rotation']).rotation_matrix\n            lidar2lidarego[:3, 3] = results['curr']['lidar2ego_translation']\n            lidar2lidarego = to_tensor(lidar2lidarego)\n            results['points'].tensor[:, :3]  = results['points'].tensor[:, :3].matmul(lidar2lidarego[:3, :3].T) + lidar2lidarego[:3, 3]\n\n        return results\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__ + '('\n        repr_str += f'shift_height={self.shift_height}, '\n        repr_str += f'use_color={self.use_color}, '\n        repr_str += f'file_client_args={self.file_client_args}, '\n        repr_str += f'load_dim={self.load_dim}, '\n        repr_str += f'use_dim={self.use_dim})'\n        return repr_str\n\n\n\n\n@PIPELINES.register_module()\nclass LoadPointsFromDict(LoadPointsFromFile):\n    \"\"\"Load Points From Dict.\"\"\"\n\n    def __call__(self, results):\n        assert 'points' in results\n        return results\n\n\n@PIPELINES.register_module()\nclass LoadAnnotations3D(LoadAnnotations):\n    \"\"\"Load Annotations3D.\n\n    Load instance mask and semantic mask of points and\n    encapsulate the items into related fields.\n\n    Args:\n        with_bbox_3d (bool, optional): Whether to load 3D boxes.\n            Defaults to True.\n        with_label_3d (bool, optional): Whether to load 3D labels.\n            Defaults to True.\n        with_attr_label (bool, optional): Whether to load attribute label.\n            Defaults to False.\n        with_mask_3d (bool, optional): Whether to load 3D instance masks.\n            for points. Defaults to False.\n        with_seg_3d (bool, optional): Whether to load 3D semantic masks.\n            for points. Defaults to False.\n        with_bbox (bool, optional): Whether to load 2D boxes.\n            Defaults to False.\n        with_label (bool, optional): Whether to load 2D labels.\n            Defaults to False.\n        with_mask (bool, optional): Whether to load 2D instance masks.\n            Defaults to False.\n        with_seg (bool, optional): Whether to load 2D semantic masks.\n            Defaults to False.\n        with_bbox_depth (bool, optional): Whether to load 2.5D boxes.\n            Defaults to False.\n        poly2mask (bool, optional): Whether to convert polygon annotations\n            to bitmasks. Defaults to True.\n        seg_3d_dtype (dtype, optional): Dtype of 3D semantic masks.\n            Defaults to int64\n        file_client_args (dict): Config dict of file clients, refer to\n            https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py\n            for more details.\n    \"\"\"\n\n    def __init__(self,\n                 with_bbox_3d=True,\n                 with_label_3d=True,\n                 with_attr_label=False,\n                 with_mask_3d=False,\n                 with_seg_3d=False,\n                 with_bbox=False,\n                 with_label=False,\n                 with_mask=False,\n                 with_seg=False,\n                 with_bbox_depth=False,\n                 poly2mask=True,\n                 seg_3d_dtype=np.int64,\n                 file_client_args=dict(backend='disk')):\n        super().__init__(\n            with_bbox,\n            with_label,\n            with_mask,\n            with_seg,\n            poly2mask,\n            file_client_args=file_client_args)\n        self.with_bbox_3d = with_bbox_3d\n        self.with_bbox_depth = with_bbox_depth\n        self.with_label_3d = with_label_3d\n        self.with_attr_label = with_attr_label\n        self.with_mask_3d = with_mask_3d\n        self.with_seg_3d = with_seg_3d\n        self.seg_3d_dtype = seg_3d_dtype\n\n    def _load_bboxes_3d(self, results):\n        \"\"\"Private function to load 3D bounding box annotations.\n\n        Args:\n            results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.\n\n        Returns:\n            dict: The dict containing loaded 3D bounding box annotations.\n        \"\"\"\n        results['gt_bboxes_3d'] = results['ann_infos'][0]\n        results['bbox3d_fields'].append('gt_bboxes_3d')\n        return results\n\n    def _load_bboxes_depth(self, results):\n        \"\"\"Private function to load 2.5D bounding box annotations.\n\n        Args:\n            results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.\n\n        Returns:\n            dict: The dict containing loaded 2.5D bounding box annotations.\n        \"\"\"\n        results['centers2d'] = results['ann_info']['centers2d']\n        results['depths'] = results['ann_info']['depths']\n        return results\n\n    def _load_labels_3d(self, results):\n        \"\"\"Private function to load label annotations.\n\n        Args:\n            results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.\n\n        Returns:\n            dict: The dict containing loaded label annotations.\n        \"\"\"\n        results['gt_labels_3d'] = results['ann_infos'][1]\n        return results\n\n    def _load_attr_labels(self, results):\n        \"\"\"Private function to load label annotations.\n\n        Args:\n            results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.\n\n        Returns:\n            dict: The dict containing loaded label annotations.\n        \"\"\"\n        results['attr_labels'] = results['ann_infos']['attr_labels']\n        return results\n\n    def _load_masks_3d(self, results):\n        \"\"\"Private function to load 3D mask annotations.\n\n        Args:\n            results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.\n\n        Returns:\n            dict: The dict containing loaded 3D mask annotations.\n        \"\"\"\n        pts_instance_mask_path = results['ann_infos']['pts_instance_mask_path']\n\n        if self.file_client is None:\n            self.file_client = mmcv.FileClient(**self.file_client_args)\n        try:\n            mask_bytes = self.file_client.get(pts_instance_mask_path)\n            pts_instance_mask = np.frombuffer(mask_bytes, dtype=np.int64)\n        except ConnectionError:\n            mmcv.check_file_exist(pts_instance_mask_path)\n            pts_instance_mask = np.fromfile(\n                pts_instance_mask_path, dtype=np.int64)\n\n        results['pts_instance_mask'] = pts_instance_mask\n        results['pts_mask_fields'].append('pts_instance_mask')\n        return results\n\n    def _load_semantic_seg_3d(self, results):\n        \"\"\"Private function to load 3D semantic segmentation annotations.\n\n        Args:\n            results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.\n\n        Returns:\n            dict: The dict containing the semantic segmentation annotations.\n        \"\"\"\n        pts_semantic_mask_path = results['ann_infos']['pts_semantic_mask_path']\n\n        if self.file_client is None:\n            self.file_client = mmcv.FileClient(**self.file_client_args)\n        try:\n            mask_bytes = self.file_client.get(pts_semantic_mask_path)\n            # add .copy() to fix read-only bug\n            pts_semantic_mask = np.frombuffer(\n                mask_bytes, dtype=self.seg_3d_dtype).copy()\n        except ConnectionError:\n            mmcv.check_file_exist(pts_semantic_mask_path)\n            pts_semantic_mask = np.fromfile(\n                pts_semantic_mask_path, dtype=np.int64)\n\n        results['pts_semantic_mask'] = pts_semantic_mask\n        results['pts_seg_fields'].append('pts_semantic_mask')\n        return results\n\n    def __call__(self, results):\n        \"\"\"Call function to load multiple types annotations.\n\n        Args:\n            results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.\n\n        Returns:\n            dict: The dict containing loaded 3D bounding box, label, mask and\n                semantic segmentation annotations.\n        \"\"\"\n        results = super().__call__(results)\n        if self.with_bbox_3d:\n            results = self._load_bboxes_3d(results)\n            if results is None:\n                return None\n        if self.with_bbox_depth:\n            results = self._load_bboxes_depth(results)\n            if results is None:\n                return None\n        if self.with_label_3d:\n            results = self._load_labels_3d(results)\n        if self.with_attr_label:\n            results = self._load_attr_labels(results)\n        if self.with_mask_3d:\n            results = self._load_masks_3d(results)\n        if self.with_seg_3d:\n            results = self._load_semantic_seg_3d(results)\n\n        return results\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        indent_str = '    '\n        repr_str = self.__class__.__name__ + '(\\n'\n        repr_str += f'{indent_str}with_bbox_3d={self.with_bbox_3d}, '\n        repr_str += f'{indent_str}with_label_3d={self.with_label_3d}, '\n        repr_str += f'{indent_str}with_attr_label={self.with_attr_label}, '\n        repr_str += f'{indent_str}with_mask_3d={self.with_mask_3d}, '\n        repr_str += f'{indent_str}with_seg_3d={self.with_seg_3d}, '\n        repr_str += f'{indent_str}with_bbox={self.with_bbox}, '\n        repr_str += f'{indent_str}with_label={self.with_label}, '\n        repr_str += f'{indent_str}with_mask={self.with_mask}, '\n        repr_str += f'{indent_str}with_seg={self.with_seg}, '\n        repr_str += f'{indent_str}with_bbox_depth={self.with_bbox_depth}, '\n        repr_str += f'{indent_str}poly2mask={self.poly2mask})'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass PointToMultiViewDepth(object):\n\n    def __init__(self, grid_config, downsample=1):\n        self.downsample = downsample\n        self.grid_config = grid_config\n\n    def points2depthmap(self, points, height, width):\n        height, width = height // self.downsample, width // self.downsample\n        depth_map = torch.zeros((height, width), dtype=torch.float32)\n        coor = torch.round(points[:, :2] / self.downsample)\n        depth = points[:, 2]\n        kept1 = (coor[:, 0] >= 0) & (coor[:, 0] < width) & (\n            coor[:, 1] >= 0) & (coor[:, 1] < height) & (\n                depth < self.grid_config['depth'][1]) & (\n                    depth >= self.grid_config['depth'][0])\n        coor, depth = coor[kept1], depth[kept1]\n\n        ranks = coor[:, 0] + coor[:, 1] * width\n        sort = (ranks + depth / 100.).argsort()\n        coor, depth, ranks = coor[sort], depth[sort], ranks[sort]\n\n\n        kept2 = torch.ones(coor.shape[0], device=coor.device, dtype=torch.bool)\n        kept2[1:] = (ranks[1:] != ranks[:-1])\n        coor, depth = coor[kept2], depth[kept2]\n\n\n        coor = coor.to(torch.long)\n        depth_map[coor[:, 1], coor[:, 0]] = depth\n     \n        return depth_map\n\n    def __call__(self, results):\n        points_lidar = results['points']\n        imgs, rots, trans, intrins = results['img_inputs'][:4]\n        post_rots, post_trans, bda = results['img_inputs'][4:]\n        depth_map_list = []\n        for cid in range(len(results['cam_names'])):\n            cam_name = results['cam_names'][cid]\n            # lidar2lidarego = np.eye(4, dtype=np.float32)\n            # lidar2lidarego[:3, :3] = Quaternion(\n            #     results['curr']['lidar2ego_rotation']).rotation_matrix\n            # lidar2lidarego[:3, 3] = results['curr']['lidar2ego_translation']\n            # lidar2lidarego = to_tensor(lidar2lidarego)\n\n            lidarego2global = np.eye(4, dtype=np.float32)\n            lidarego2global[:3, :3] = Quaternion(\n                results['curr']['ego2global_rotation']).rotation_matrix\n            lidarego2global[:3, 3] = results['curr']['ego2global_translation']\n            lidarego2global = to_tensor(lidarego2global)\n\n            cam2camego = np.eye(4, dtype=np.float32)\n            cam2camego[:3, :3] = Quaternion(\n                results['curr']['cams'][cam_name]\n                ['sensor2ego_rotation']).rotation_matrix\n            cam2camego[:3, 3] = results['curr']['cams'][cam_name][\n                'sensor2ego_translation']\n            cam2camego = to_tensor(cam2camego)\n\n            camego2global = np.eye(4, dtype=np.float32)\n            camego2global[:3, :3] = Quaternion(\n                results['curr']['cams'][cam_name]\n                ['ego2global_rotation']).rotation_matrix\n            camego2global[:3, 3] = results['curr']['cams'][cam_name][\n                'ego2global_translation']\n            camego2global = to_tensor(camego2global)\n\n            cam2img = np.eye(4, dtype=np.float32)\n            cam2img = to_tensor(cam2img)\n            cam2img[:3, :3] = intrins[cid]\n\n            lidar2cam = torch.inverse(camego2global.matmul(cam2camego)).matmul(lidarego2global)\n            # lidarego2global.matmul(lidar2lidarego))\n            lidar2img = cam2img.matmul(lidar2cam)\n            points_img = points_lidar.tensor[:, :3].matmul(\n                lidar2img[:3, :3].T) + lidar2img[:3, 3].unsqueeze(0)\n            points_img = torch.cat(\n                [points_img[:, :2] / points_img[:, 2:3], points_img[:, 2:3]],\n                1)\n            points_img = points_img.matmul(\n                post_rots[cid].T) + post_trans[cid:cid + 1, :]\n            depth_map = self.points2depthmap(points_img, imgs.shape[2],\n                                             imgs.shape[3])  \n            depth_map_list.append(depth_map)\n          \n        depth_map = torch.stack(depth_map_list)\n\n        results['gt_depth'] = depth_map\n\n        return results\n\n\ndef mmlabNormalize(img, mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True, debug=False):\n    from mmcv.image.photometric import imnormalize\n    mean = np.array(mean, dtype=np.float32)\n    std = np.array(std, dtype=np.float32)\n    to_rgb = to_rgb\n    if debug:\n        print('warning, debug in mmlabNormalize')\n        img = np.asarray(img) # not normalize for visualization\n    else:\n        img = imnormalize(np.array(img), mean, std, to_rgb)\n    img = torch.tensor(img).float().permute(2, 0, 1).contiguous()\n    return img\n\n\n@PIPELINES.register_module()\nclass PrepareImageInputs(object):\n    \"\"\"Load multi channel images from a list of separate channel files.\n\n    Expects results['img_filename'] to be a list of filenames.\n\n    Args:\n        to_float32 (bool): Whether to convert the img to float32.\n            Defaults to False.\n        color_type (str): Color type of the file. Defaults to 'unchanged'.\n    \"\"\"\n\n    def __init__(\n        self,\n        data_config,\n        is_train=False,\n        sequential=False,\n        ego_cam='CAM_FRONT',\n        img_corruptions=None,\n        normalize_cfg=dict(\n             mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True, debug=False\n        )\n    ):\n        self.is_train = is_train\n        self.data_config = data_config\n        self.normalize_img = mmlabNormalize\n        self.sequential = sequential\n        self.ego_cam = ego_cam\n        self.normalize_cfg = normalize_cfg\n        self.img_corruptions = img_corruptions\n\n    def get_rot(self, h):\n        return torch.Tensor(\n            np.array([\n            [np.cos(h), np.sin(h)],\n            [-np.sin(h), np.cos(h)],\n            ]))\n\n    def img_transform(self, img, post_rot, post_tran, resize, resize_dims,\n                      crop, flip, rotate):\n        # adjust image\n        img = self.img_transform_core(img, resize_dims, crop, flip, rotate)\n\n        # post-homography transformation\n        post_rot *= resize\n        post_tran -= torch.Tensor(crop[:2])\n        if flip:\n            A = torch.Tensor([[-1, 0], [0, 1]])\n            b = torch.Tensor([crop[2] - crop[0], 0])\n            post_rot = A.matmul(post_rot)\n            post_tran = A.matmul(post_tran) + b\n        A = self.get_rot(rotate / 180 * np.pi)\n        b = torch.Tensor([crop[2] - crop[0], crop[3] - crop[1]]) / 2\n        b = A.matmul(-b) + b\n        post_rot = A.matmul(post_rot)\n        post_tran = A.matmul(post_tran) + b\n\n        return img, post_rot, post_tran\n\n    def img_transform_core(self, img, resize_dims, crop, flip, rotate):\n        # adjust image\n        img = img.resize(resize_dims)\n        img = img.crop(crop)\n        if flip:\n            img = img.transpose(method=Image.FLIP_LEFT_RIGHT)\n        img = img.rotate(rotate)\n        return img\n\n    def choose_cams(self):\n        if self.is_train and self.data_config['Ncams'] < len(\n                self.data_config['cams']):\n            cam_names = np.random.choice(\n                self.data_config['cams'],\n                self.data_config['Ncams'],\n                replace=False)\n        else:\n            cam_names = self.data_config['cams']\n        return cam_names\n\n    def sample_augmentation(self, H, W, flip=None, scale=None):\n        fH, fW = self.data_config['input_size']\n        H, W = self.data_config['src_size']\n        if self.is_train:\n            # resize = float(fW) / float(W)\n            # resize += np.random.uniform(*self.data_config['resize'])\n            resize = np.random.uniform(*self.data_config[\"resize\"])\n            resize_dims = (int(W * resize), int(H * resize))\n            newW, newH = resize_dims\n            crop_h = int((1 - np.random.uniform(*self.data_config['crop_h'])) *\n                         newH) - fH\n            crop_w = int(np.random.uniform(0, max(0, newW - fW)))\n            crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)\n            flip = self.data_config['flip'] and np.random.choice([0, 1])\n            rotate = np.random.uniform(*self.data_config['rot'])\n        else:\n            # resize = float(fW) / float(W)\n            resize = max(fH / H, fW / W)\n            # resize += self.data_config.get('resize_test', 0.0)\n            if scale is not None:\n                resize = scale\n            resize_dims = (int(W * resize), int(H * resize))\n            newW, newH = resize_dims\n            crop_h = int((1 - np.mean(self.data_config['crop_h'])) * newH) - fH\n            crop_w = int(max(0, newW - fW) / 2)\n            crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)\n            flip = False if flip is None else flip\n            rotate = 0\n        return resize, resize_dims, crop, flip, rotate\n\n    def get_sensor2ego_transformation(self,\n                                      cam_info,\n                                      key_info,\n                                      cam_name,\n                                      ego_cam=None):\n        if ego_cam is None:\n            ego_cam = cam_name\n        w, x, y, z = cam_info['cams'][cam_name]['sensor2ego_rotation']\n        # sweep sensor to sweep ego\n        sweepsensor2sweepego_rot = torch.Tensor(\n            Quaternion(w, x, y, z).rotation_matrix)\n        sweepsensor2sweepego_tran = torch.Tensor(\n            cam_info['cams'][cam_name]['sensor2ego_translation'])\n        sweepsensor2sweepego = sweepsensor2sweepego_rot.new_zeros((4, 4))\n        sweepsensor2sweepego[3, 3] = 1\n        sweepsensor2sweepego[:3, :3] = sweepsensor2sweepego_rot\n        sweepsensor2sweepego[:3, -1] = sweepsensor2sweepego_tran\n        # sweep ego to global\n        w, x, y, z = cam_info['cams'][cam_name]['ego2global_rotation']\n        sweepego2global_rot = torch.Tensor(\n            Quaternion(w, x, y, z).rotation_matrix)\n        sweepego2global_tran = torch.Tensor(\n            cam_info['cams'][cam_name]['ego2global_translation'])\n        sweepego2global = sweepego2global_rot.new_zeros((4, 4))\n        sweepego2global[3, 3] = 1\n        sweepego2global[:3, :3] = sweepego2global_rot\n        sweepego2global[:3, -1] = sweepego2global_tran\n\n        # global sensor to cur ego\n        w, x, y, z = key_info['cams'][ego_cam]['ego2global_rotation']\n        keyego2global_rot = torch.Tensor(\n            Quaternion(w, x, y, z).rotation_matrix)\n        keyego2global_tran = torch.Tensor(\n            key_info['cams'][ego_cam]['ego2global_translation'])\n        keyego2global = keyego2global_rot.new_zeros((4, 4))\n        keyego2global[3, 3] = 1\n        keyego2global[:3, :3] = keyego2global_rot\n        keyego2global[:3, -1] = keyego2global_tran\n        global2keyego = keyego2global.inverse()\n\n        sweepsensor2keyego = \\\n            global2keyego @ sweepego2global @ sweepsensor2sweepego\n\n        # global sensor to cur ego\n        w, x, y, z = key_info['cams'][cam_name]['ego2global_rotation']\n        keyego2global_rot = torch.Tensor(\n            Quaternion(w, x, y, z).rotation_matrix)\n        keyego2global_tran = torch.Tensor(\n            key_info['cams'][cam_name]['ego2global_translation'])\n        keyego2global = keyego2global_rot.new_zeros((4, 4))\n        keyego2global[3, 3] = 1\n        keyego2global[:3, :3] = keyego2global_rot\n        keyego2global[:3, -1] = keyego2global_tran\n        global2keyego = keyego2global.inverse()\n\n        # cur ego to sensor\n        w, x, y, z = key_info['cams'][cam_name]['sensor2ego_rotation']\n        keysensor2keyego_rot = torch.Tensor(\n            Quaternion(w, x, y, z).rotation_matrix)\n        keysensor2keyego_tran = torch.Tensor(\n            key_info['cams'][cam_name]['sensor2ego_translation'])\n        keysensor2keyego = keysensor2keyego_rot.new_zeros((4, 4))\n        keysensor2keyego[3, 3] = 1\n        keysensor2keyego[:3, :3] = keysensor2keyego_rot\n        keysensor2keyego[:3, -1] = keysensor2keyego_tran\n        keyego2keysensor = keysensor2keyego.inverse()\n        keysensor2sweepsensor = (\n            keyego2keysensor @ global2keyego @ sweepego2global\n            @ sweepsensor2sweepego).inverse()\n        return sweepsensor2keyego, keysensor2sweepsensor\n\n\n    def get_sensor_transforms(self, cam_info, cam_name):\n        w, x, y, z = cam_info['cams'][cam_name]['sensor2ego_rotation']\n        # sweep sensor to sweep ego\n        sensor2ego_rot = torch.Tensor(\n            Quaternion(w, x, y, z).rotation_matrix)\n        sensor2ego_tran = torch.Tensor(\n            cam_info['cams'][cam_name]['sensor2ego_translation'])\n        sensor2ego = sensor2ego_rot.new_zeros((4, 4))\n        sensor2ego[3, 3] = 1\n        sensor2ego[:3, :3] = sensor2ego_rot\n        sensor2ego[:3, -1] = sensor2ego_tran\n        # sweep ego to global\n        w, x, y, z = cam_info['cams'][cam_name]['ego2global_rotation']\n        ego2global_rot = torch.Tensor(\n            Quaternion(w, x, y, z).rotation_matrix)\n        ego2global_tran = torch.Tensor(\n            cam_info['cams'][cam_name]['ego2global_translation'])\n        ego2global = ego2global_rot.new_zeros((4, 4))\n        ego2global[3, 3] = 1\n        ego2global[:3, :3] = ego2global_rot\n        ego2global[:3, -1] = ego2global_tran\n        return sensor2ego, ego2global\n\n    def get_inputs(self, results, scale=None):\n        imgs = []\n        rots = []\n        trans = []\n        intrins = []\n        post_rots = []\n        post_trans = []\n        sensor2egos = []\n        ego2globals = []\n        cam_names = self.choose_cams()\n        results['cam_names'] = cam_names\n        results['input_size'] = self.data_config['input_size']\n        canvas = []\n        sensor2sensors = []\n        results['img_augs'] = {}\n        for cam_name in cam_names:\n            cam_data = results['curr']['cams'][cam_name]\n            filename = cam_data['data_path']\n            if self.img_corruptions in ['sun', 'noise', 'rain', 'snow', 'fog']:\n                filename = filename.split('/')\n                filename[2] = 'nuscenes_aug'\n                filename[3] = f'samples_{self.img_corruptions}'\n                filename = osp.join(*filename)\n            \n            img = Image.open(filename)\n            \n\n\n            post_rot = torch.eye(2)\n            post_tran = torch.zeros(2)\n\n            intrin = torch.Tensor(cam_data['cam_intrinsic'])\n\n            sensor2keyego, sensor2sensor = \\\n                self.get_sensor2ego_transformation(results['curr'],\n                                                   results['curr'],\n                                                   cam_name,\n                                                   self.ego_cam)\n            rot = sensor2keyego[:3, :3]\n            tran = sensor2keyego[:3, 3]\n            sensor2ego, ego2global = \\\n                self.get_sensor_transforms(results['curr'], cam_name)\n            # image view augmentation (resize, crop, horizontal flip, rotate)\n            if results.get('tta_config', None) is not None:\n                flip = results['tta_config']['tta_flip']\n            else: flip = None\n            img_augs = self.sample_augmentation(\n                H=img.height, W=img.width, flip=flip, scale=scale)\n            resize, resize_dims, crop, flip, rotate = img_augs\n            results['img_augs'][cam_name] = img_augs\n            img, post_rot2, post_tran2 = \\\n                self.img_transform(img, post_rot,\n                                   post_tran,\n                                   resize=resize,\n                                   resize_dims=resize_dims,\n                                   crop=crop,\n                                   flip=flip,\n                                   rotate=rotate)\n\n            # for convenience, make augmentation matrices 3x3\n            post_tran = torch.zeros(3)\n            post_rot = torch.eye(3)\n            post_tran[:2] = post_tran2\n            post_rot[:2, :2] = post_rot2\n\n            canvas.append(np.array(img))\n            if self.img_corruptions == 'drop':\n                imgs.append(self.normalize_img(img, **self.normalize_cfg)* 0)\n            else:\n                imgs.append(self.normalize_img(img, **self.normalize_cfg))\n\n            if self.sequential:\n                assert 'adjacent' in results\n                for adj_info in results['adjacent']:\n                    filename_adj = adj_info['cams'][cam_name]['data_path']\n                    img_adjacent = Image.open(filename_adj)\n                    img_adjacent = self.img_transform_core(\n                        img_adjacent,\n                        resize_dims=resize_dims,\n                        crop=crop,\n                        flip=flip,\n                        rotate=rotate)\n                    imgs.append(self.normalize_img(img_adjacent, **self.normalize_cfg))\n            intrins.append(intrin)\n            rots.append(rot)\n            trans.append(tran)\n            post_rots.append(post_rot)\n            post_trans.append(post_tran)\n            sensor2sensors.append(sensor2sensor)\n            sensor2egos.append(sensor2ego)\n            ego2globals.append(ego2global)\n\n        if self.sequential:\n            for adj_info in results['adjacent']:\n                post_trans.extend(post_trans[:len(cam_names)])\n                post_rots.extend(post_rots[:len(cam_names)])\n                intrins.extend(intrins[:len(cam_names)])\n\n                # align\n                trans_adj = []\n                rots_adj = []\n                sensor2sensors_adj = []\n                for cam_name in cam_names:\n                    adjsensor2keyego, sensor2sensor = \\\n                        self.get_sensor2ego_transformation(adj_info,\n                                                           results['curr'],\n                                                           cam_name,\n                                                           self.ego_cam)\n                    rot = adjsensor2keyego[:3, :3]\n                    tran = adjsensor2keyego[:3, 3]\n                    rots_adj.append(rot)\n                    trans_adj.append(tran)\n                    sensor2sensors_adj.append(sensor2sensor)\n                for cam_name in cam_names:\n                    sensor2ego, ego2global = \\\n                        self.get_sensor_transforms(adj_info, cam_name)\n                    sensor2egos.append(sensor2ego)\n                    ego2globals.append(ego2global)\n\n                rots.extend(rots_adj)\n                trans.extend(trans_adj)\n                sensor2sensors.extend(sensor2sensors_adj)\n        imgs = torch.stack(imgs)\n        \n        sensor2egos = torch.stack(sensor2egos)\n        ego2globals = torch.stack(ego2globals)\n\n        rots = torch.stack(rots)\n        trans = torch.stack(trans)\n        intrins = torch.stack(intrins)\n        post_rots = torch.stack(post_rots)\n        post_trans = torch.stack(post_trans)\n        sensor2sensors = torch.stack(sensor2sensors)\n        results['canvas'] = canvas\n        results['sensor2sensors'] = sensor2sensors\n        return (imgs, rots, trans, intrins, post_rots, post_trans), (sensor2egos, ego2globals)\n\n    def __call__(self, results):\n        results['img_inputs'], results['aux_cam_params'] = self.get_inputs(results)\n        return results\n\n\n@PIPELINES.register_module()\nclass LoadAnnotationsBEVDepth(object):\n\n\n    def __init__(self, bda_aug_conf, classes, with_2d_bbox=False, with_ego_as_agent=False, is_train=True):\n        self.bda_aug_conf = bda_aug_conf\n        self.is_train = is_train\n        self.classes = classes\n        self.with_2d_bbox = with_2d_bbox\n        self.min_size = 2.0\n        self.with_ego_as_agent = with_ego_as_agent\n\n    def sample_bda_augmentation(self, tta_config=None):\n        \"\"\"Generate bda augmentation values based on bda_config.\"\"\"\n        if self.is_train:\n            rotate_bda = np.random.uniform(*self.bda_aug_conf['rot_lim'])\n            scale_bda = np.random.uniform(*self.bda_aug_conf['scale_lim'])\n            flip_dx = np.random.uniform() < self.bda_aug_conf['flip_dx_ratio']\n            flip_dy = np.random.uniform() < self.bda_aug_conf['flip_dy_ratio']\n            translation_std = self.bda_aug_conf.get('tran_lim', [0.0, 0.0, 0.0])\n            tran_bda = np.random.normal(scale=translation_std, size=3).T\n        else:\n            rotate_bda = 0\n            scale_bda = 1.0\n            if tta_config is not None:\n                flip_dx = tta_config['flip_dx']\n                flip_dy = tta_config['flip_dy']\n            else:\n                flip_dx = False\n                flip_dy = False\n            tran_bda = np.zeros((1, 3), dtype=np.float32)\n        return rotate_bda, scale_bda, flip_dx, flip_dy, tran_bda\n\n\n    def bev_transform(self, gt_boxes, rotate_angle, scale_ratio, flip_dx,\n                      flip_dy):\n        rotate_angle = torch.tensor(rotate_angle / 180 * np.pi)\n        rot_sin = torch.sin(rotate_angle)\n        rot_cos = torch.cos(rotate_angle)\n        rot_mat = torch.Tensor([[rot_cos, -rot_sin, 0], [rot_sin, rot_cos, 0],\n                                [0, 0, 1]])\n        scale_mat = torch.Tensor([[scale_ratio, 0, 0], [0, scale_ratio, 0],\n                                  [0, 0, scale_ratio]])\n        flip_mat = torch.Tensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]])\n        if flip_dx:\n            flip_mat = flip_mat @ torch.Tensor([[-1, 0, 0], [0, 1, 0],\n                                                [0, 0, 1]])\n        if flip_dy:\n            flip_mat = flip_mat @ torch.Tensor([[1, 0, 0], [0, -1, 0],\n                                                [0, 0, 1]])\n        rot_mat = flip_mat @ (scale_mat @ rot_mat)\n        if gt_boxes.shape[0] > 0:\n            gt_boxes[:, :3] = (\n                rot_mat @ gt_boxes[:, :3].unsqueeze(-1)).squeeze(-1)\n            gt_boxes[:, 3:6] *= scale_ratio\n            gt_boxes[:, 6] += rotate_angle\n            if flip_dx:\n                gt_boxes[:,\n                         6] = 2 * torch.asin(torch.tensor(1.0)) - gt_boxes[:,\n                                                                           6]\n            if flip_dy:\n                gt_boxes[:, 6] = -gt_boxes[:, 6]\n            gt_boxes[:, 7:] = (\n                rot_mat[:2, :2] @ gt_boxes[:, 7:].unsqueeze(-1)).squeeze(-1)\n        return gt_boxes, rot_mat\n\n    def _bboxes_transform(self, bboxes, centers2d, gt_labels, depths, resize, crop, flip, fH, fW):\n        assert len(bboxes) == len(centers2d) == len(gt_labels) == len(depths)\n        bboxes = bboxes * resize\n        bboxes[:, 0] = bboxes[:, 0] - crop[0]\n        bboxes[:, 1] = bboxes[:, 1] - crop[1]\n        bboxes[:, 2] = bboxes[:, 2] - crop[0]\n        bboxes[:, 3] = bboxes[:, 3] - crop[1]\n        bboxes[:, 0] = np.clip(bboxes[:, 0], 0, fW)\n        bboxes[:, 2] = np.clip(bboxes[:, 2], 0, fW)\n        bboxes[:, 1] = np.clip(bboxes[:, 1], 0, fH) \n        bboxes[:, 3] = np.clip(bboxes[:, 3], 0, fH)\n        keep = ((bboxes[:, 2] - bboxes[:, 0]) >= self.min_size) & ((bboxes[:, 3] - bboxes[:, 1]) >= self.min_size)\n\n\n        if flip:\n            x0 = bboxes[:, 0].copy()\n            x1 = bboxes[:, 2].copy()\n            bboxes[:, 2] = fW - x0\n            bboxes[:, 0] = fW - x1\n        bboxes = bboxes[keep]\n\n        centers2d  = centers2d * resize\n        centers2d[:, 0] = centers2d[:, 0] - crop[0]\n        centers2d[:, 1] = centers2d[:, 1] - crop[1]\n        centers2d[:, 0] = np.clip(centers2d[:, 0], 0, fW)\n        centers2d[:, 1] = np.clip(centers2d[:, 1], 0, fH) \n        if flip:\n            centers2d[:, 0] = fW - centers2d[:, 0]\n\n        centers2d = centers2d[keep]\n        gt_labels = gt_labels[keep]\n        depths = depths[keep]\n\n        return bboxes, centers2d, gt_labels, depths\n\n\n    def _filter_invisible(self, bboxes, centers2d, gt_labels, depths, fH, fW ):\n        # filter invisible 2d bboxes\n        assert len(bboxes) == len(centers2d) == len(gt_labels) == len(depths)\n\n        indices_maps = np.zeros((fH,fW))\n        tmp_bboxes = np.zeros_like(bboxes)\n        tmp_bboxes[:, :2] = np.ceil(bboxes[:, :2])\n        tmp_bboxes[:, 2:] = np.floor(bboxes[:, 2:])\n        tmp_bboxes = tmp_bboxes.astype(np.int64)\n        sort_idx = np.argsort(-depths, axis=0, kind='stable')\n        tmp_bboxes = tmp_bboxes[sort_idx]\n        bboxes = bboxes[sort_idx]\n        depths = depths[sort_idx]\n        centers2d = centers2d[sort_idx]\n        gt_labels = gt_labels[sort_idx]\n        for i in range(bboxes.shape[0]):\n            u1, v1, u2, v2 = tmp_bboxes[i]\n            indices_maps[v1:v2, u1:u2] = i\n        indices_res = np.unique(indices_maps).astype(np.int64)\n        bboxes = bboxes[indices_res]\n        depths = depths[indices_res]\n        centers2d = centers2d[indices_res]\n        gt_labels = gt_labels[indices_res]\n\n        return bboxes, centers2d, gt_labels, depths\n\n    def __call__(self, results):\n        gt_boxes, gt_labels = results['ann_infos']['gt_boxes_3d'], results['ann_infos']['gt_labels_3d']\n\n        if self.with_ego_as_agent:\n            ego_xyz = np.array([0, 0, 0])\n            ego_wlh = np.array([4.084, 1.85, 1.8])\n            ego_yaw = np.array([0])\n            ego_vel = results['curr']['gt_ego_lcf_feat'][:2]\n            ego_box = np.concatenate([ego_xyz, ego_wlh, ego_yaw, ego_vel])\n            gt_boxes =  [ego_box] + gt_boxes\n            gt_labels = [0] + gt_labels\n\n            if 'instance_inds' in results.keys():\n                results['instance_inds'] = np.concatenate([[1e7], results['instance_inds']])\n\n        if self.with_2d_bbox:\n            # gt_boxes_2d, gt_labels_2d = results['ann_infos']['gt_boxes_2d'], results['ann_infos']['gt_labels_2d']\n            # gt_centers2d, gt_depth2d = results['ann_infos']['centers2d'], results['ann_infos']['depths']\n            new_gt_bboxes = []\n            new_centers2d = []\n            new_gt_labels = []\n            new_depths = []\n            fH, fW = results['input_size']\n            for cam in results['cam_names']:\n                camera_types_2d = [\n                    'CAM_FRONT',\n                    'CAM_FRONT_RIGHT',\n                    'CAM_FRONT_LEFT',\n                    'CAM_BACK',\n                    'CAM_BACK_LEFT',\n                    'CAM_BACK_RIGHT',\n                ]\n                i = camera_types_2d.index(cam)\n                resize, resize_dims, crop, flip, rotate = results['img_augs'][cam]\n                gt_bboxes_2d = results['ann_infos']['gt_boxes_2d'][i]\n                centers2d = results['ann_infos']['centers2d'][i]\n                gt_labels_2d = results['ann_infos']['gt_labels_2d'][i]\n                depths = results['ann_infos']['depths'][i]\n                if len(gt_bboxes_2d) != 0:\n                    gt_bboxes_2d, centers2d, gt_labels_2d, depths = self._bboxes_transform(\n                        gt_bboxes_2d, \n                        centers2d,\n                        gt_labels_2d,\n                        depths,\n                        resize=resize,\n                        crop=crop,\n                        flip=flip,\n                        fH=fH,\n                        fW=fW,\n                    )\n                if len(gt_bboxes_2d) != 0:\n                    gt_bboxes_2d, centers2d, gt_labels_2d, depths =  self._filter_invisible(gt_bboxes_2d, centers2d, gt_labels_2d, depths, fH, fW)\n\n                new_gt_bboxes.append(to_tensor(gt_bboxes_2d))\n                new_centers2d.append(to_tensor(centers2d))\n                new_gt_labels.append(to_tensor(gt_labels_2d))\n                new_depths.append(to_tensor(depths))\n\n            results['gt_bboxes_2d'] = new_gt_bboxes\n            results['centers2d'] = new_centers2d\n            results['gt_labels_2d'] = new_gt_labels\n            results['depths2d'] = new_depths\n\n        gt_boxes, gt_labels = torch.Tensor(np.array(gt_boxes)), torch.tensor(np.array(gt_labels))\n        tta_confg = results.get('tta_config', None)\n\n        rotate_bda, scale_bda, flip_dx, flip_dy, tran_bda = self.sample_bda_augmentation(tta_confg)\n\n        bda_mat = torch.zeros(4, 4)\n        bda_mat[3, 3] = 1\n        gt_boxes, bda_rot = self.bev_transform(gt_boxes, rotate_bda, scale_bda,\n                                               flip_dx, flip_dy)\n        if 'points' in results:\n            points = results['points'].tensor\n            points_aug = (bda_rot @ points[:, :3].unsqueeze(-1)).squeeze(-1)\n            points[:, :3] = points_aug + tran_bda\n            points = results['points'].new_point(points)\n            results['points'] = points\n        \n        bda_mat[:3, :3] = bda_rot\n        if len(gt_boxes) == 0:\n            gt_boxes = torch.zeros(0, 9)\n        results['gt_bboxes_3d'] = \\\n            LiDARInstance3DBoxes(gt_boxes, box_dim=gt_boxes.shape[-1],\n                                 origin=(0.5, 0.5, 0.5))\n        results['gt_labels_3d'] = gt_labels\n        imgs, rots, trans, intrins = results['img_inputs'][:4]\n        post_rots, post_trans = results['img_inputs'][4:]\n        results['img_inputs'] = (imgs, rots, trans, intrins, post_rots,\n                                 post_trans, bda_rot)\n        \n        results['flip_dx'] = flip_dx\n        results['flip_dy'] = flip_dy\n        results['rotate_bda'] = rotate_bda\n        results['scale_bda'] = scale_bda\n        results['bda_mat'] = bda_mat\n        if 'ego_pose' in results:\n            results['ori_ego_pose'] = results['ego_pose'].clone()\n            results['ego_pose'] =  results['ego_pose'] @ torch.inverse(bda_mat)\n            results['ego_pose_inv'] = bda_mat @ results['ego_pose_inv']\n        return results\n\n"
  },
  {
    "path": "mmdet3d/datasets/pipelines/test_time_aug.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\nfrom copy import deepcopy\n\nimport mmcv\n\nfrom ..builder import PIPELINES\nfrom .compose import Compose\nfrom mmcv.runner import get_dist_info\n\n@PIPELINES.register_module()\nclass MultiScaleFlipAug:\n    \"\"\"Test-time augmentation with multiple scales and flipping. An example\n    configuration is as followed:\n\n    .. code-block::\n        img_scale=[(1333, 400), (1333, 800)],\n        flip=True,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ]\n    After MultiScaleFLipAug with above configuration, the results are wrapped\n    into lists of the same length as followed:\n    .. code-block::\n        dict(\n            img=[...],\n            img_shape=[...],\n            scale=[(1333, 400), (1333, 400), (1333, 800), (1333, 800)]\n            flip=[False, True, False, True]\n            ...\n        )\n    Args:\n        transforms (list[dict]): Transforms to apply in each augmentation.\n        img_scale (tuple | list[tuple] | None): Images scales for resizing.\n        scale_factor (float | list[float] | None): Scale factors for resizing.\n        flip (bool): Whether apply flip augmentation. Default: False.\n        flip_direction (str | list[str]): Flip augmentation directions,\n            options are \"horizontal\", \"vertical\" and \"diagonal\". If\n            flip_direction is a list, multiple flip augmentations will be\n            applied. It has no effect when flip == False. Default:\n            \"horizontal\".\n    \"\"\"\n\n    def __init__(self,\n                 transforms,\n                 img_scale=None,\n                 scale_factor=None,\n                 flip=False,\n                 flip_direction='horizontal'):\n        self.transforms = Compose(transforms)\n        assert (img_scale is None) ^ (scale_factor is None), (\n            'Must have but only one variable can be set')\n        if img_scale is not None:\n            self.img_scale = img_scale if isinstance(img_scale,\n                                                     list) else [img_scale]\n            self.scale_key = 'scale'\n            assert mmcv.is_list_of(self.img_scale, tuple)\n        else:\n            self.img_scale = scale_factor if isinstance(\n                scale_factor, list) else [scale_factor]\n            self.scale_key = 'scale_factor'\n\n        self.flip = flip\n        self.flip_direction = flip_direction if isinstance(\n            flip_direction, list) else [flip_direction]\n        assert mmcv.is_list_of(self.flip_direction, str)\n        if not self.flip and self.flip_direction != ['horizontal']:\n            warnings.warn(\n                'flip_direction has no effect when flip is set to False')\n        if (self.flip\n                and not any([t['type'] == 'RandomFlip' for t in transforms])):\n            warnings.warn(\n                'flip has no effect when RandomFlip is not in transforms')\n\n    def __call__(self, results):\n        \"\"\"Call function to apply test time augment transforms on results.\n\n        Args:\n            results (dict): Result dict contains the data to transform.\n        Returns:\n           dict[str: list]: The augmented data, where each value is wrapped\n               into a list.\n        \"\"\"\n\n        aug_data = []\n        flip_args = [(False, None)]\n        if self.flip:\n            flip_args += [(True, direction)\n                          for direction in self.flip_direction]\n        for scale in self.img_scale:\n            for flip, direction in flip_args:\n                _results = results.copy()\n                _results[self.scale_key] = scale\n                _results['flip'] = flip\n                _results['flip_direction'] = direction\n                data = self.transforms(_results)\n                aug_data.append(data)\n        # list of dict to dict of list\n        aug_data_dict = {key: [] for key in aug_data[0]}\n        for data in aug_data:\n            for key, val in data.items():\n                aug_data_dict[key].append(val)\n        return aug_data_dict\n\n    def __repr__(self):\n        repr_str = self.__class__.__name__\n        repr_str += f'(transforms={self.transforms}, '\n        repr_str += f'img_scale={self.img_scale}, flip={self.flip}, '\n        repr_str += f'flip_direction={self.flip_direction})'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass MultiScaleFlipAug3D(object):\n    \"\"\"Test-time augmentation with multiple scales and flipping.\n\n    Args:\n        transforms (list[dict]): Transforms to apply in each augmentation.\n        img_scale (tuple | list[tuple]: Images scales for resizing.\n        pts_scale_ratio (float | list[float]): Points scale ratios for\n            resizing.\n        flip (bool, optional): Whether apply flip augmentation.\n            Defaults to False.\n        flip_direction (str | list[str], optional): Flip augmentation\n            directions for images, options are \"horizontal\" and \"vertical\".\n            If flip_direction is list, multiple flip augmentations will\n            be applied. It has no effect when ``flip == False``.\n            Defaults to \"horizontal\".\n        pcd_horizontal_flip (bool, optional): Whether apply horizontal\n            flip augmentation to point cloud. Defaults to True.\n            Note that it works only when 'flip' is turned on.\n        pcd_vertical_flip (bool, optional): Whether apply vertical flip\n            augmentation to point cloud. Defaults to True.\n            Note that it works only when 'flip' is turned on.\n    \"\"\"\n\n    def __init__(self,\n                 transforms,\n                 img_scale,\n                 pts_scale_ratio,\n                 flip=False,\n                 flip_direction='horizontal',\n                 pcd_horizontal_flip=False,\n                 pcd_vertical_flip=False):\n        self.transforms = Compose(transforms)\n        self.img_scale = img_scale if isinstance(img_scale,\n                                                 list) else [img_scale]\n        self.pts_scale_ratio = pts_scale_ratio \\\n            if isinstance(pts_scale_ratio, list) else [float(pts_scale_ratio)]\n\n        assert mmcv.is_list_of(self.img_scale, tuple)\n        assert mmcv.is_list_of(self.pts_scale_ratio, float)\n\n        self.flip = flip\n        self.pcd_horizontal_flip = pcd_horizontal_flip\n        self.pcd_vertical_flip = pcd_vertical_flip\n\n        self.flip_direction = flip_direction if isinstance(\n            flip_direction, list) else [flip_direction]\n        assert mmcv.is_list_of(self.flip_direction, str)\n        if not self.flip and self.flip_direction != ['horizontal']:\n            warnings.warn(\n                'flip_direction has no effect when flip is set to False')\n        if (self.flip and not any([(t['type'] == 'RandomFlip3D'\n                                    or t['type'] == 'RandomFlip')\n                                   for t in transforms])):\n            warnings.warn(\n                'flip has no effect when RandomFlip is not in transforms')\n\n    def __call__(self, results):\n        \"\"\"Call function to augment common fields in results.\n\n        Args:\n            results (dict): Result dict contains the data to augment.\n\n        Returns:\n            dict: The result dict contains the data that is augmented with\n                different scales and flips.\n        \"\"\"\n        aug_data = []\n\n        # modified from `flip_aug = [False, True] if self.flip else [False]`\n        # to reduce unnecessary scenes when using double flip augmentation\n        # during test time\n        flip_aug = [True] if self.flip else [False]\n        pcd_horizontal_flip_aug = [False, True] \\\n            if self.flip and self.pcd_horizontal_flip else [False]\n        pcd_vertical_flip_aug = [False, True] \\\n            if self.flip and self.pcd_vertical_flip else [False]\n        for scale in self.img_scale:\n            for pts_scale_ratio in self.pts_scale_ratio:\n                for flip in flip_aug:\n                    for pcd_horizontal_flip in pcd_horizontal_flip_aug:\n                        for pcd_vertical_flip in pcd_vertical_flip_aug:\n                            for direction in self.flip_direction:\n                                # results.copy will cause bug\n                                # since it is shallow copy\n                                _results = deepcopy(results)\n                                _results['scale'] = scale\n                                _results['flip'] = flip\n                                _results['pcd_scale_factor'] = \\\n                                    pts_scale_ratio\n                                _results['flip_direction'] = direction\n                                _results['pcd_horizontal_flip'] = \\\n                                    pcd_horizontal_flip\n                                _results['pcd_vertical_flip'] = \\\n                                    pcd_vertical_flip\n                                data = self.transforms(_results)\n\n                                aug_data.append(data)\n        # list of dict to dict of list\n        aug_data_dict = {key: [] for key in aug_data[0]}\n        for data in aug_data:\n            for key, val in data.items():\n                aug_data_dict[key].append(val)\n        return aug_data_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(transforms={self.transforms}, '\n        repr_str += f'img_scale={self.img_scale}, flip={self.flip}, '\n        repr_str += f'pts_scale_ratio={self.pts_scale_ratio}, '\n        repr_str += f'flip_direction={self.flip_direction})'\n        return repr_str\n\n@PIPELINES.register_module()\nclass CustomMultiScaleFlipAug3D(object):\n    \"\"\"Test-time augmentation with multiple scales and flipping.\n\n    Args:\n        transforms (list[dict]): Transforms to apply in each augmentation.\n        img_scale (tuple | list[tuple]: Images scales for resizing.\n        pts_scale_ratio (float | list[float]): Points scale ratios for\n            resizing.\n        flip (bool, optional): Whether apply flip augmentation.\n            Defaults to False.\n        flip_direction (str | list[str], optional): Flip augmentation\n            directions for images, options are \"horizontal\" and \"vertical\".\n            If flip_direction is list, multiple flip augmentations will\n            be applied. It has no effect when ``flip == False``.\n            Defaults to \"horizontal\".\n        pcd_horizontal_flip (bool, optional): Whether apply horizontal\n            flip augmentation to point cloud. Defaults to True.\n            Note that it works only when 'flip' is turned on.\n        pcd_vertical_flip (bool, optional): Whether apply vertical flip\n            augmentation to point cloud. Defaults to True.\n            Note that it works only when 'flip' is turned on.\n    \"\"\"\n\n    def __init__(self,\n                 transforms,\n                 tta_dx=False,\n                 tta_dy=False,\n                 tta=False):\n        self.transforms = Compose(transforms)\n        self.tta = tta\n        self.tta_dx = tta_dx\n        self.tta_dy = tta_dy\n\n    def __call__(self, results):\n        \"\"\"Call function to augment common fields in results.\n\n        Args:\n            results (dict): Result dict contains the data to augment.\n\n        Returns:\n            dict: The result dict contains the data that is augmented with\n                different scales and flips.\n        \"\"\"\n        aug_data = []\n\n        # modified from `flip_aug = [False, True] if self.flip else [False]`\n        # to reduce unnecessary scenes when using double flip augmentation\n        # during test time\n        flip_aug = [False, True] if self.tta else [False]\n        flip_dx_aug = [False, True] \\\n            if self.tta_dx else [False]\n        flip_dy_aug = [False, True] \\\n            if self.tta_dy else [False]\n        \n        for flip in flip_aug:\n            for flip_dx in flip_dx_aug:\n                for flip_dy in flip_dy_aug:\n                        # for direction in self.flip_direction:\n                        # results.copy will cause bug\n                        # since it is shallow copy\n                        tta_config = dict(\n                            tta_flip = flip,\n                            flip_dx = flip_dx,\n                            flip_dy = flip_dy,\n                        )\n                        results['tta_config'] = tta_config\n                        _results = deepcopy(results)\n\n                        data = self.transforms(_results)\n\n                        aug_data.append(data)\n        # list of dict to dict of list\n        aug_data_dict = {key: [] for key in aug_data[0]}\n        for data in aug_data:\n            for key, val in data.items():\n                aug_data_dict[key].append(val)\n        return aug_data_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        return repr_str\n\n\n\n@PIPELINES.register_module()\nclass CustomDistMultiScaleFlipAug3D(object):\n    \"\"\"Test-time augmentation with multiple scales and flipping.\n\n    Args:\n        transforms (list[dict]): Transforms to apply in each augmentation.\n        img_scale (tuple | list[tuple]: Images scales for resizing.\n        pts_scale_ratio (float | list[float]): Points scale ratios for\n            resizing.\n        flip (bool, optional): Whether apply flip augmentation.\n            Defaults to False.\n        flip_direction (str | list[str], optional): Flip augmentation\n            directions for images, options are \"horizontal\" and \"vertical\".\n            If flip_direction is list, multiple flip augmentations will\n            be applied. It has no effect when ``flip == False``.\n            Defaults to \"horizontal\".\n        pcd_horizontal_flip (bool, optional): Whether apply horizontal\n            flip augmentation to point cloud. Defaults to True.\n            Note that it works only when 'flip' is turned on.\n        pcd_vertical_flip (bool, optional): Whether apply vertical flip\n            augmentation to point cloud. Defaults to True.\n            Note that it works only when 'flip' is turned on.\n    \"\"\"\n\n    def __init__(self,\n                 transforms,\n                 tta=False):\n        self.transforms = Compose(transforms)\n        self.tta = tta\n\n    def __call__(self, results):\n        \"\"\"Call function to augment common fields in results.\n\n        Args:\n            results (dict): Result dict contains the data to augment.\n\n        Returns:\n            dict: The result dict contains the data that is augmented with\n                different scales and flips.\n        \"\"\"\n        _rank, _world_size = get_dist_info()\n        if self.tta: assert _world_size == 8\n        aug_data = []\n\n        # modified from `flip_aug = [False, True] if self.flip else [False]`\n        # to reduce unnecessary scenes when using double flip augmentation\n        # during test time\n        if self.tta:\n            flip_aug = [_rank & 0b100>0]\n            flip_dx_aug =  [_rank & 0b010 >0]\n            flip_dy_aug =  [_rank & 0b001 >0]\n        else: \n            flip_aug, flip_dx_aug, flip_dy_aug = [False], [False], [False]\n        for flip in flip_aug:\n            for flip_dx in flip_dx_aug:\n                for flip_dy in flip_dy_aug:\n                        # for direction in self.flip_direction:\n                        # results.copy will cause bug\n                        # since it is shallow copy\n                        tta_config = dict(\n                            tta_flip = flip,\n                            flip_dx = flip_dx,\n                            flip_dy = flip_dy,\n                            dist_tta = self.tta,\n                        )\n                        results['tta_config'] = tta_config\n                        _results = deepcopy(results)\n                        data = self.transforms(_results)\n                        aug_data.append(data)\n\n        # list of dict to dict of list\n        aug_data_dict = {key: [] for key in aug_data[0]}\n        for data in aug_data:\n            for key, val in data.items():\n                aug_data_dict[key].append(val)\n        return aug_data_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        return repr_str"
  },
  {
    "path": "mmdet3d/datasets/pipelines/transforms_3d.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport random\nimport warnings\n\nimport cv2\nimport numpy as np\nfrom mmcv import is_tuple_of\nfrom mmcv.utils import build_from_cfg\n\nfrom mmdet3d.core import VoxelGenerator\nfrom mmdet3d.core.bbox import (CameraInstance3DBoxes, DepthInstance3DBoxes,\n                               LiDARInstance3DBoxes, box_np_ops)\nfrom mmdet3d.datasets.pipelines.compose import Compose\nfrom mmdet.datasets.pipelines import RandomCrop, RandomFlip, Rotate\nfrom ..builder import OBJECTSAMPLERS, PIPELINES\nfrom .data_augment_utils import noise_per_object_v3_\nimport mmcv\nfrom copy import deepcopy\nfrom pyquaternion import Quaternion\nimport torch\nfrom PIL import Image\nimport time\nimport matplotlib.pyplot as plt\nimport numpy as np\nfrom mmdet3d.models.fbbev.utils.draw_bbox import show_multi_modality_result\n\n@PIPELINES.register_module()\nclass VisualInputsAndGT(object):\n    \"\"\"\n    show images and gt.\n    \"\"\"\n    def __init__(self, max=20):\n        self.max = max\n        self.i = 0\n    def _draw_point_cloud_(self, point_cloud, img_size):\n       \n        max_range =  np.array([50 , 50 ,  5.])\n        min_range =  np.array([-50 , -50 ,  -3.])\n        point_cloud[:, :3] = (point_cloud[:, :3] - min_range) / (max_range - min_range)\n\n       \n        max_intensity = np.max(point_cloud[:, 3])\n        min_intensity = np.min(point_cloud[:, 3])\n        point_cloud[:, 3] = point_cloud[:, 3 ]/255\n        point_cloud[:, 3] = (point_cloud[:, 3])# .astype(np.uint8)\n\n       \n        img = np.zeros((img_size[0], img_size[1], 3), np.uint8)\n        jet = plt.get_cmap('jet')\n        for i in range(point_cloud.shape[0]):\n            color = jet(point_cloud[i, 3])\n\n            color = (int(color[0]*255), int(color[1]*255), int(color[2]*255))\n            x = int(point_cloud[i, 0] * img_size[1])\n            y = int(point_cloud[i, 1] * img_size[0])\n            try:\n                cv2.circle(img, (x, y), 1, color, -1)\n            except:\n                pass\n\n        return img\n    \n    def world2bev_vis(self, x, y):\n            return int((x + 51.2) * 15), int((y + 51.2) * 15)\n    \n    def world2bev_vis2(self, x, y):\n            return int((x) * 1536), int((y) * 1536)\n    \n\n    def __visual__(self, results):\n        \n        import bbox_visualizer as bbv\n\n\n        _, _, h, w = results['img_inputs'][0].shape\n        imgs = results['img_inputs'][0].reshape(1, 6, 3, h, w)\n        imgs = imgs[0, :]\n        for i in range(6):\n            # tmp = bbv.draw_rectangle(imgs[i].permute(1, 2, 0).cpu().numpy(), results['gt_bboxes_2d'][i][0], bbox_color=[255, 0,0])\n            tmp = bbv.draw_multiple_rectangles(imgs[i].permute(1, 2, 0).cpu().numpy(), results['gt_bboxes_2d'][i].numpy().astype(np.int), bbox_color=[255, 0,0])\n            mmcv.imwrite(tmp[:,:,::-1], f'tmp_{i}.png')\n        for i in range(6):\n            # print(results['bbox3d_fields'])\n            # print(results['gt_bboxes_3d'].tensor)\n            # print(results['lidar2img'][i]) \n            \n            show_multi_modality_result(\n                imgs[i].permute(1, 2, 0).cpu().numpy(),\n                results['gt_bboxes_3d'],\n                None,\n                None,\n                '.',\n                f'aug_{i}.png',\n                camera_params=results['img_inputs'][1:] + (i,),\n                box_mode='lidar',\n                show=True,\n                scores=None,\n            )\n\n        bev_img = np.ones([1536, 1536, 3], dtype=np.float32) * 255\n        point_cloud = results['points'].tensor[:, :4].cpu().numpy().copy()\n        img_size = (1536, 1536)\n  \n        bev_img = self._draw_point_cloud_(point_cloud, img_size)\n        bev_img = bev_img.astype(np.float32)\n        mmcv.imwrite(bev_img, f'aug_bev_{results[\"index\"]}_lidar.png')\n    \n        for i, corners in enumerate(results['gt_bboxes_3d'].corners[:, [4, 7, 3, 0], :2]):\n            corners = np.array([self.world2bev_vis(*corner) for corner in corners])\n        \n            # _img = np.zeros([1536, 1536, 3], dtype=np.float32)\n            bev_img = cv2.circle(bev_img, corners[0], 5, (61, 102, 255))\n            bev_img = cv2.fillPoly(bev_img, [corners], (61, 102, 255))\n            # bev_img = cv2.addWeighted(bev_img, 1, _img, 0.5, 0)\n            # cv2.putText(bev_img, '%.1f, %.1f, %.1f' % (\n            #     results['gt_bboxes_3d'].tensor[i][0],\n            #     results['gt_bboxes_3d'].tensor[i][1],\n            #     results['gt_bboxes_3d'].tensor[i][6]),  corners[1], cv2.FONT_HERSHEY_COMPLEX, 1.0, (0, 0, 255), 2)\n\n        bev_img = cv2.circle(bev_img, self.world2bev_vis(0, 0), 5, (0, 255, 0), thickness=-1)\n        \n        if 'map_gt_bboxes_3d' in results:\n\n            if type(results['map_gt_bboxes_3d'].data) == torch.Tensor:\n                lines = results['map_gt_bboxes_3d'].data[:, 0, :, :]\n                world2bev_vis = self.world2bev_vis2\n            else:\n                lines = results['map_gt_bboxes_3d'].data.fixed_num_sampled_points\n                world2bev_vis = self.world2bev_vis\n            for k, line in enumerate(lines):\n                label = results['map_gt_labels_3d'].data[k]\n                line = line.cpu().numpy()\n                corners = np.array([world2bev_vis(*corner) for corner in line])\n                corners = [each for each in corners if ((each>=0).all() & (each<1536).all())]\n                colors = [(255, 255, 0), (255, 0, 0), (0, 255, 0)]\n                for i, corner in enumerate(corners[:-1]):\n                    bev_img = cv2.circle(bev_img, corners[i], 2, (61, 102, 255))\n                    bev_img = cv2.line(bev_img, corners[i], corners[i+1], color=colors[label], thickness=1)\n\n        # mmcv.imwrite(img, 'point bev.png')\n        if 'gt_ego_fut_trajs' in results:\n            gt_ego_fut_trajs =  results['gt_ego_fut_trajs'] # self._render_traj(results['gt_ego_fut_trajs'], 2)\n            points = np.array([self.world2bev_vis(*point.numpy()) for point in gt_ego_fut_trajs])\n            for point in points:\n                bev_img = cv2.circle(bev_img, point, 1, (0, 255, 0))\n\n        if 'gt_agent_fut_traj' in results:\n            gt_agent_fut_traj = results['gt_agent_fut_traj']\n            gt_agent_fut_traj_mask = results['gt_agent_fut_traj_mask']\n            centers = results['gt_bboxes_3d'].center[..., :2]\n            tmp = torch.cat([centers[:, None], gt_agent_fut_traj], 1)\n            trajs = torch.cumsum(tmp, 1)[:, 1:]\n            for k, traj in enumerate(trajs):\n                \n                traj = traj.cpu().numpy()\n\n                corners = np.array([self.world2bev_vis(*corner) for corner in traj])\n                center = np.array(self.world2bev_vis(*centers[k]))\n                corners = [each for each in corners if ((each>=0).all() & (each<1536).all())]\n                colors = [(255, 255, 0), (255, 0, 0), (0, 255, 0)]\n                for i, corner in enumerate(corners[:-1]):\n                    if gt_agent_fut_traj_mask[k, i+1].sum()<2 or gt_agent_fut_traj_mask[k, i].sum()<2:\n                        continue\n                    if i == 0: bev_img = cv2.line(bev_img, center, corners[i], color=(123, 22, 187), thickness=1)\n                    bev_img = cv2.circle(bev_img, corners[i], 2, (61, 102, 32))\n                    bev_img = cv2.line(bev_img, corners[i], corners[i+1], color=(123, 22, 187), thickness=1)\n\n\n        if 'fut_boxes_in_cur_ego_list' in results:\n            for k, fut_boxes in enumerate(results['fut_boxes_in_cur_ego_list']):\n\n                fut_bev_img = np.ones([1536, 1536, 3], dtype=np.float32) * 255\n                fut_bev_img = fut_bev_img.astype(np.float32)\n       \n                try:\n                    for i, corners in enumerate(fut_boxes.corners[:, [4, 7, 3, 0], :2]):\n                        corners = np.array([self.world2bev_vis(*corner) for corner in corners])\n                        fut_bev_img = cv2.circle(fut_bev_img, corners[0], 5, (61, 102, 255))\n                        fut_bev_img = cv2.fillPoly(fut_bev_img, [corners], (61, 102, 255))\n                except: pass\n                \n                fut_bev_img = cv2.circle(fut_bev_img, self.world2bev_vis(0, 0), 5, (0, 255, 0), thickness=-1)\n        \n                mmcv.imwrite(fut_bev_img, f'aug_bev_{results[\"index\"]}_fut_{k}.png',)\n        mmcv.imwrite(bev_img, f'bev_{results[\"index\"]}.png',)\n        print('saved', f'bev_{results[\"index\"]}.png') \n        from IPython import embed\n        embed()\n        exit()\n\n\n    def _render_traj(self, future_traj, points_per_step=10):\n        total_steps = (len(future_traj)-1) * points_per_step + 1\n        total_xy = torch.zeros((total_steps, 2), device=future_traj.device)\n        for i in range(total_steps-1):\n            unit_vec = future_traj[i//points_per_step +\n                                   1] - future_traj[i//points_per_step]\n            total_xy[i] = (i/points_per_step - i//points_per_step) * \\\n                unit_vec + future_traj[i//points_per_step]\n        total_xy[-1] = future_traj[-1]\n        return total_xy\n\n    def __call__(self, results):\n\n        self.__visual__(results)\n        return results\n\n\n\n\n\n@PIPELINES.register_module()\nclass GridMask:\n    def __init__(\n        self,\n        use_h=True,\n        use_w=True,\n        rotate=1,\n        offset=False,\n        ratio=0.5,\n        mode=1,\n    ):\n        self.use_h = use_h\n        self.use_w = use_w\n        self.rotate = rotate\n        self.offset = offset\n        self.ratio = ratio\n        self.mode = mode\n        self.epoch = None\n\n\n    def __call__(self, results):\n\n        imgs = results[\"img_inputs\"][0]\n        h = imgs[0].shape[1]\n        w = imgs[0].shape[2]\n        self.d1 = 2\n        self.d2 = min(h, w)\n        hh = int(1.5 * h)\n        ww = int(1.5 * w)\n        d = np.random.randint(self.d1, self.d2)\n        if self.ratio == 1:\n            self.l = np.random.randint(1, d)\n        else:\n            self.l = min(max(int(d * self.ratio + 0.5), 1), d - 1)\n        mask = np.ones((hh, ww), np.float32)\n        st_h = np.random.randint(d)\n        st_w = np.random.randint(d)\n        if self.use_h:\n            for i in range(hh // d):\n                s = d * i + st_h\n                t = min(s + self.l, hh)\n                mask[s:t, :] *= 0\n        if self.use_w:\n            for i in range(ww // d):\n                s = d * i + st_w\n                t = min(s + self.l, ww)\n                mask[:, s:t] *= 0\n\n        r = np.random.randint(self.rotate)\n        mask = Image.fromarray(np.uint8(mask))\n        mask = mask.rotate(r)\n        mask = np.asarray(mask)\n        mask = mask[\n            (hh - h) // 2 : (hh - h) // 2 + h, (ww - w) // 2 : (ww - w) // 2 + w\n        ]\n\n        mask = mask.astype(np.float32)\n        mask = mask[None, :, :]\n        if self.mode == 1:\n            mask = 1 - mask\n\n        # mask = mask.expand_as(imgs[0])\n        if self.offset:\n            offset = torch.from_numpy(2 * (np.random.rand(h, w) - 0.5)).float()\n            offset = (1 - mask) * offset\n            imgs = imgs * mask[None] + offset\n            # imgs = [x * mask + offset for x in imgs]\n        else:\n            imgs = imgs * mask[None]\n            # imgs = torch.tensor[x * mask for x in imgs]\n        results[\"img_inputs\"] = (imgs,) + results[\"img_inputs\"][1:]\n        # results.update(img=imgs)\n        return results\n\n\n@PIPELINES.register_module()\nclass AugPoints(object):\n    def __call__(self, results):\n\n\n        if results['rotate_bda'] != 0:\n            results['points'].rotate(results['rotate_bda']/180 * np.pi)\n        if results['scale_bda'] != 1:\n            results['points'].scale(results['scale_bda'])\n\n        if results['flip_dx']:\n            results['points'].flip('vertical')\n        if results['flip_dy']:\n            results['points'].flip('horizontal')\n\n\n        return results\n\n\n\n@PIPELINES.register_module()\nclass ToEgo(object):\n    def __init__(self, ego_cam='CAM_FRONT',):\n        self.ego_cam=ego_cam\n\n    def __call__(self, results):\n        lidar2lidarego = np.eye(4, dtype=np.float32)\n        lidar2lidarego[:3, :3] = Quaternion(\n            results['curr']['lidar2ego_rotation']).rotation_matrix\n        lidar2lidarego[:3, 3] = results['curr']['lidar2ego_translation']\n\n        lidarego2global = np.eye(4, dtype=np.float32)\n        lidarego2global[:3, :3] = Quaternion(\n            results['curr']['ego2global_rotation']).rotation_matrix\n        lidarego2global[:3, 3] = results['curr']['ego2global_translation']\n\n        camego2global = np.eye(4, dtype=np.float32)\n        camego2global[:3, :3] = Quaternion(\n            results['curr']['cams'][self.ego_cam]\n            ['ego2global_rotation']).rotation_matrix\n        camego2global[:3, 3] = results['curr']['cams'][self.ego_cam][\n            'ego2global_translation']\n        lidar2camego = np.linalg.inv(camego2global) @ lidarego2global @ lidar2lidarego\n\n        points = results['points'].tensor.numpy()\n        points_ego = lidar2camego[:3,:3].reshape(1, 3, 3) @ \\\n                     points[:, :3].reshape(-1, 3, 1) + \\\n                     lidar2camego[:3, 3].reshape(1, 3, 1)\n        points[:, :3] = points_ego.squeeze(-1)\n        points = results['points'].new_point(points)\n        results['points'] = points\n        return results\n\n\n\n@PIPELINES.register_module()\nclass PadMultiViewImage(object):\n    \"\"\"Pad the multi-view image.\n    There are two padding modes: (1) pad to a fixed size and (2) pad to the\n    minimum size that is divisible by some number.\n    Added keys are \"pad_shape\", \"pad_fixed_size\", \"pad_size_divisor\",\n    Args:\n        size (tuple, optional): Fixed padding size.\n        size_divisor (int, optional): The divisor of padded size.\n        pad_val (float, optional): Padding value, 0 by default.\n    \"\"\"\n\n    def __init__(self, size=None, size_divisor=None, pad_val=0):\n        self.size = size\n        self.size_divisor = size_divisor\n        self.pad_val = pad_val\n        # only one of size and size_divisor should be valid\n        # assert size is not None or size_divisor is not None\n        # assert size is None or size_divisor is None\n\n    def _pad_img(self, results):\n        \"\"\"Pad images according to ``self.size``.\"\"\"\n        if self.size == 'same2max':\n            max_shape = (max([img.shape[0] for img in results['img']]), max([img.shape[1] for img in results['img']]))\n            divisor = self.size_divisor\n            pad_h = int(np.ceil(max_shape[0] / divisor)) * divisor\n            pad_w = int(np.ceil(max_shape[1] / divisor)) * divisor\n            padded_img = [mmcv.impad(\n                img, shape=(pad_h, pad_w), pad_val=self.pad_val) for img in results['img']]\n        elif self.size is not None:\n            padded_img = [mmcv.impad(\n                img, shape=self.size, pad_val=self.pad_val) for img in results['img']]\n        elif self.size_divisor is not None:\n            padded_img = [mmcv.impad_to_multiple(\n                img, self.size_divisor, pad_val=self.pad_val) for img in results['img']]\n        \n        results['ori_shape'] = [img.shape for img in results['img']]\n        results['img'] = padded_img\n        results['img_shape'] = [img.shape for img in padded_img]\n        results['pad_shape'] = [img.shape for img in padded_img]\n        results['pad_fixed_size'] = self.size\n        results['pad_size_divisor'] = self.size_divisor\n\n   \n    def __call__(self, results):\n        \"\"\"Call function to pad images, masks, semantic segmentation maps.\n        Args:\n            results (dict): Result dict from loading pipeline.\n        Returns:\n            dict: Updated result dict.\n        \"\"\"\n       \n        self._pad_img(results)\n         \n        return results\n\n    def __repr__(self):\n        repr_str = self.__class__.__name__\n        repr_str += f'(size={self.size}, '\n        repr_str += f'size_divisor={self.size_divisor}, '\n        repr_str += f'pad_val={self.pad_val})'\n        return repr_str\n\n\n\n@PIPELINES.register_module()\nclass RandomDropPointsColor(object):\n    r\"\"\"Randomly set the color of points to all zeros.\n\n    Once this transform is executed, all the points' color will be dropped.\n    Refer to `PAConv <https://github.com/CVMI-Lab/PAConv/blob/main/scene_seg/\n    util/transform.py#L223>`_ for more details.\n\n    Args:\n        drop_ratio (float, optional): The probability of dropping point colors.\n            Defaults to 0.2.\n    \"\"\"\n\n    def __init__(self, drop_ratio=0.2):\n        assert isinstance(drop_ratio, (int, float)) and 0 <= drop_ratio <= 1, \\\n            f'invalid drop_ratio value {drop_ratio}'\n        self.drop_ratio = drop_ratio\n\n    def __call__(self, input_dict):\n        \"\"\"Call function to drop point colors.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after color dropping,\n                'points' key is updated in the result dict.\n        \"\"\"\n        points = input_dict['points']\n        assert points.attribute_dims is not None and \\\n            'color' in points.attribute_dims, \\\n            'Expect points have color attribute'\n\n        # this if-expression is a bit strange\n        # `RandomDropPointsColor` is used in training 3D segmentor PAConv\n        # we discovered in our experiments that, using\n        # `if np.random.rand() > 1.0 - self.drop_ratio` consistently leads to\n        # better results than using `if np.random.rand() < self.drop_ratio`\n        # so we keep this hack in our codebase\n        if np.random.rand() > 1.0 - self.drop_ratio:\n            points.color = points.color * 0.0\n        return input_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(drop_ratio={self.drop_ratio})'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass RandomFlip3D(RandomFlip):\n    \"\"\"Flip the points & bbox.\n\n    If the input dict contains the key \"flip\", then the flag will be used,\n    otherwise it will be randomly decided by a ratio specified in the init\n    method.\n\n    Args:\n        sync_2d (bool, optional): Whether to apply flip according to the 2D\n            images. If True, it will apply the same flip as that to 2D images.\n            If False, it will decide whether to flip randomly and independently\n            to that of 2D images. Defaults to True.\n        flip_ratio_bev_horizontal (float, optional): The flipping probability\n            in horizontal direction. Defaults to 0.0.\n        flip_ratio_bev_vertical (float, optional): The flipping probability\n            in vertical direction. Defaults to 0.0.\n    \"\"\"\n\n    def __init__(self,\n                 sync_2d=True,\n                 flip_ratio_bev_horizontal=0.0,\n                 flip_ratio_bev_vertical=0.0,\n                 **kwargs):\n        super(RandomFlip3D, self).__init__(\n            flip_ratio=flip_ratio_bev_horizontal, **kwargs)\n        self.sync_2d = sync_2d\n        self.flip_ratio_bev_vertical = flip_ratio_bev_vertical\n        if flip_ratio_bev_horizontal is not None:\n            assert isinstance(\n                flip_ratio_bev_horizontal,\n                (int, float)) and 0 <= flip_ratio_bev_horizontal <= 1\n        if flip_ratio_bev_vertical is not None:\n            assert isinstance(\n                flip_ratio_bev_vertical,\n                (int, float)) and 0 <= flip_ratio_bev_vertical <= 1\n\n    def random_flip_data_3d(self, input_dict, direction='horizontal'):\n        \"\"\"Flip 3D data randomly.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n            direction (str, optional): Flip direction.\n                Default: 'horizontal'.\n\n        Returns:\n            dict: Flipped results, 'points', 'bbox3d_fields' keys are\n                updated in the result dict.\n        \"\"\"\n        assert direction in ['horizontal', 'vertical']\n        # for semantic segmentation task, only points will be flipped.\n        if 'bbox3d_fields' not in input_dict:\n            input_dict['points'].flip(direction)\n            return\n        if len(input_dict['bbox3d_fields']) == 0:  # test mode\n            input_dict['bbox3d_fields'].append('empty_box3d')\n            input_dict['empty_box3d'] = input_dict['box_type_3d'](\n                np.array([], dtype=np.float32))\n        assert len(input_dict['bbox3d_fields']) == 1\n        for key in input_dict['bbox3d_fields']:\n            if 'points' in input_dict:\n                input_dict['points'] = input_dict[key].flip(\n                    direction, points=input_dict['points'])\n            else:\n                input_dict[key].flip(direction)\n        if 'centers2d' in input_dict:\n            assert self.sync_2d is True and direction == 'horizontal', \\\n                'Only support sync_2d=True and horizontal flip with images'\n            w = input_dict['ori_shape'][1]\n            input_dict['centers2d'][..., 0] = \\\n                w - input_dict['centers2d'][..., 0]\n            # need to modify the horizontal position of camera center\n            # along u-axis in the image (flip like centers2d)\n            # ['cam2img'][0][2] = c_u\n            # see more details and examples at\n            # https://github.com/open-mmlab/mmdetection3d/pull/744\n            input_dict['cam2img'][0][2] = w - input_dict['cam2img'][0][2]\n\n    def __call__(self, input_dict):\n        \"\"\"Call function to flip points, values in the ``bbox3d_fields`` and\n        also flip 2D image and its annotations.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Flipped results, 'flip', 'flip_direction',\n                'pcd_horizontal_flip' and 'pcd_vertical_flip' keys are added\n                into result dict.\n        \"\"\"\n        # flip 2D image and its annotations\n        super(RandomFlip3D, self).__call__(input_dict)\n\n        if self.sync_2d:\n            input_dict['pcd_horizontal_flip'] = input_dict['flip']\n            input_dict['pcd_vertical_flip'] = False\n        else:\n            if 'pcd_horizontal_flip' not in input_dict:\n                flip_horizontal = True if np.random.rand(\n                ) < self.flip_ratio else False\n                input_dict['pcd_horizontal_flip'] = flip_horizontal\n            if 'pcd_vertical_flip' not in input_dict:\n                flip_vertical = True if np.random.rand(\n                ) < self.flip_ratio_bev_vertical else False\n                input_dict['pcd_vertical_flip'] = flip_vertical\n\n        if 'transformation_3d_flow' not in input_dict:\n            input_dict['transformation_3d_flow'] = []\n\n        if input_dict['pcd_horizontal_flip']:\n            self.random_flip_data_3d(input_dict, 'horizontal')\n            input_dict['transformation_3d_flow'].extend(['HF'])\n        if input_dict['pcd_vertical_flip']:\n            self.random_flip_data_3d(input_dict, 'vertical')\n            input_dict['transformation_3d_flow'].extend(['VF'])\n        return input_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(sync_2d={self.sync_2d},'\n        repr_str += f' flip_ratio_bev_vertical={self.flip_ratio_bev_vertical})'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass MultiViewWrapper(object):\n    \"\"\"Wrap transformation from single-view into multi-view.\n\n    The wrapper processes the images from multi-view one by one. For each\n    image, it constructs a pseudo dict according to the keys specified by the\n    'process_fields' parameter. After the transformation is finished, desired\n    information can be collected by specifying the keys in the 'collected_keys'\n    parameter. Multi-view images share the same transformation parameters\n    but do not share the same magnitude when a random transformation is\n    conducted.\n\n    Args:\n        transforms (list[dict]): A list of dict specifying the transformations\n            for the monocular situation.\n        process_fields (dict): Desired keys that the transformations should\n            be conducted on. Default to dict(img_fields=['img']).\n        collected_keys (list[str]): Collect information in transformation\n            like rotate angles, crop roi, and flip state.\n    \"\"\"\n\n    def __init__(self,\n                 transforms,\n                 process_fields=dict(img_fields=['img']),\n                 collected_keys=[]):\n        self.transform = Compose(transforms)\n        self.collected_keys = collected_keys\n        self.process_fields = process_fields\n\n    def __call__(self, input_dict):\n        for key in self.collected_keys:\n            input_dict[key] = []\n        for img_id in range(len(input_dict['img'])):\n            process_dict = self.process_fields.copy()\n            for field in self.process_fields:\n                for key in self.process_fields[field]:\n                    process_dict[key] = input_dict[key][img_id]\n            process_dict = self.transform(process_dict)\n            for field in self.process_fields:\n                for key in self.process_fields[field]:\n                    input_dict[key][img_id] = process_dict[key]\n            for key in self.collected_keys:\n                input_dict[key].append(process_dict[key])\n        return input_dict\n\n\n@PIPELINES.register_module()\nclass RangeLimitedRandomCrop(RandomCrop):\n    \"\"\"Randomly crop image-view objects under a limitation of range.\n\n    Args:\n        relative_x_offset_range (tuple[float]): Relative range of random crop\n            in x direction. (x_min, x_max) in [0, 1.0]. Default to (0.0, 1.0).\n        relative_y_offset_range (tuple[float]): Relative range of random crop\n            in y direction. (y_min, y_max) in [0, 1.0]. Default to (0.0, 1.0).\n    \"\"\"\n\n    def __init__(self,\n                 relative_x_offset_range=(0.0, 1.0),\n                 relative_y_offset_range=(0.0, 1.0),\n                 **kwargs):\n        super(RangeLimitedRandomCrop, self).__init__(**kwargs)\n        for range in [relative_x_offset_range, relative_y_offset_range]:\n            assert 0 <= range[0] <= range[1] <= 1\n        self.relative_x_offset_range = relative_x_offset_range\n        self.relative_y_offset_range = relative_y_offset_range\n\n    def _crop_data(self, results, crop_size, allow_negative_crop):\n        \"\"\"Function to randomly crop images.\n\n        Modified from RandomCrop in mmdet==2.25.0\n\n        Args:\n            results (dict): Result dict from loading pipeline.\n            crop_size (tuple): Expected absolute size after cropping, (h, w).\n\n        Returns:\n            dict: Randomly cropped results, 'img_shape' key in result dict is\n                updated according to crop size.\n        \"\"\"\n        assert crop_size[0] > 0 and crop_size[1] > 0\n        for key in results.get('img_fields', ['img']):\n            img = results[key]\n            margin_h = max(img.shape[0] - crop_size[0], 0)\n            margin_w = max(img.shape[1] - crop_size[1], 0)\n            offset_range_h = (margin_h * self.relative_y_offset_range[0],\n                              margin_h * self.relative_y_offset_range[1] + 1)\n            offset_h = np.random.randint(*offset_range_h)\n            offset_range_w = (margin_w * self.relative_x_offset_range[0],\n                              margin_w * self.relative_x_offset_range[1] + 1)\n            offset_w = np.random.randint(*offset_range_w)\n            crop_y1, crop_y2 = offset_h, offset_h + crop_size[0]\n            crop_x1, crop_x2 = offset_w, offset_w + crop_size[1]\n\n            # crop the image\n            img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...]\n            img_shape = img.shape\n            results[key] = img\n            results['crop'] = (crop_x1, crop_y1, crop_x2, crop_y2)\n        results['img_shape'] = img_shape\n\n        # crop bboxes accordingly and clip to the image boundary\n        for key in results.get('bbox_fields', []):\n            # e.g. gt_bboxes and gt_bboxes_ignore\n            bbox_offset = np.array([offset_w, offset_h, offset_w, offset_h],\n                                   dtype=np.float32)\n            bboxes = results[key] - bbox_offset\n            if self.bbox_clip_border:\n                bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1])\n                bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0])\n            valid_inds = (bboxes[:, 2] > bboxes[:, 0]) & (\n                bboxes[:, 3] > bboxes[:, 1])\n            # If the crop does not contain any gt-bbox area and\n            # allow_negative_crop is False, skip this image.\n            if (key == 'gt_bboxes' and not valid_inds.any()\n                    and not allow_negative_crop):\n                return None\n            results[key] = bboxes[valid_inds, :]\n            # label fields. e.g. gt_labels and gt_labels_ignore\n            label_key = self.bbox2label.get(key)\n            if label_key in results:\n                results[label_key] = results[label_key][valid_inds]\n\n            # mask fields, e.g. gt_masks and gt_masks_ignore\n            mask_key = self.bbox2mask.get(key)\n            if mask_key in results:\n                results[mask_key] = results[mask_key][\n                    valid_inds.nonzero()[0]].crop(\n                        np.asarray([crop_x1, crop_y1, crop_x2, crop_y2]))\n                if self.recompute_bbox:\n                    results[key] = results[mask_key].get_bboxes()\n\n        # crop semantic seg\n        for key in results.get('seg_fields', []):\n            results[key] = results[key][crop_y1:crop_y2, crop_x1:crop_x2]\n\n        return results\n\n\n@PIPELINES.register_module()\nclass RandomRotate(Rotate):\n    \"\"\"Randomly rotate images.\n\n    The ratation angle is selected uniformly within the interval specified by\n    the 'range'  parameter.\n\n    Args:\n        range (tuple[float]): Define the range of random rotation.\n            (angle_min, angle_max) in angle.\n    \"\"\"\n\n    def __init__(self, range, **kwargs):\n        super(RandomRotate, self).__init__(**kwargs)\n        self.range = range\n\n    def __call__(self, results):\n        self.angle = np.random.uniform(self.range[0], self.range[1])\n        super(RandomRotate, self).__call__(results)\n        results['rotate'] = self.angle\n        return results\n\n\n@PIPELINES.register_module()\nclass RandomJitterPoints(object):\n    \"\"\"Randomly jitter point coordinates.\n\n    Different from the global translation in ``GlobalRotScaleTrans``, here we\n        apply different noises to each point in a scene.\n\n    Args:\n        jitter_std (list[float]): The standard deviation of jittering noise.\n            This applies random noise to all points in a 3D scene, which is\n            sampled from a gaussian distribution whose standard deviation is\n            set by ``jitter_std``. Defaults to [0.01, 0.01, 0.01]\n        clip_range (list[float]): Clip the randomly generated jitter\n            noise into this range. If None is given, don't perform clipping.\n            Defaults to [-0.05, 0.05]\n\n    Note:\n        This transform should only be used in point cloud segmentation tasks\n            because we don't transform ground-truth bboxes accordingly.\n        For similar transform in detection task, please refer to `ObjectNoise`.\n    \"\"\"\n\n    def __init__(self,\n                 jitter_std=[0.01, 0.01, 0.01],\n                 clip_range=[-0.05, 0.05]):\n        seq_types = (list, tuple, np.ndarray)\n        if not isinstance(jitter_std, seq_types):\n            assert isinstance(jitter_std, (int, float)), \\\n                f'unsupported jitter_std type {type(jitter_std)}'\n            jitter_std = [jitter_std, jitter_std, jitter_std]\n        self.jitter_std = jitter_std\n\n        if clip_range is not None:\n            if not isinstance(clip_range, seq_types):\n                assert isinstance(clip_range, (int, float)), \\\n                    f'unsupported clip_range type {type(clip_range)}'\n                clip_range = [-clip_range, clip_range]\n        self.clip_range = clip_range\n\n    def __call__(self, input_dict):\n        \"\"\"Call function to jitter all the points in the scene.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after adding noise to each point,\n                'points' key is updated in the result dict.\n        \"\"\"\n        points = input_dict['points']\n        jitter_std = np.array(self.jitter_std, dtype=np.float32)\n        jitter_noise = \\\n            np.random.randn(points.shape[0], 3) * jitter_std[None, :]\n        if self.clip_range is not None:\n            jitter_noise = np.clip(jitter_noise, self.clip_range[0],\n                                   self.clip_range[1])\n\n        points.translate(jitter_noise)\n        return input_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(jitter_std={self.jitter_std},'\n        repr_str += f' clip_range={self.clip_range})'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass ObjectSample(object):\n    \"\"\"Sample GT objects to the data.\n\n    Args:\n        db_sampler (dict): Config dict of the database sampler.\n        sample_2d (bool): Whether to also paste 2D image patch to the images\n            This should be true when applying multi-modality cut-and-paste.\n            Defaults to False.\n        use_ground_plane (bool): Whether to use gound plane to adjust the\n            3D labels.\n    \"\"\"\n\n    def __init__(self, db_sampler, sample_2d=False, use_ground_plane=False):\n        self.sampler_cfg = db_sampler\n        self.sample_2d = sample_2d\n        if 'type' not in db_sampler.keys():\n            db_sampler['type'] = 'DataBaseSampler'\n        self.db_sampler = build_from_cfg(db_sampler, OBJECTSAMPLERS)\n        self.use_ground_plane = use_ground_plane\n\n    @staticmethod\n    def remove_points_in_boxes(points, boxes):\n        \"\"\"Remove the points in the sampled bounding boxes.\n\n        Args:\n            points (:obj:`BasePoints`): Input point cloud array.\n            boxes (np.ndarray): Sampled ground truth boxes.\n\n        Returns:\n            np.ndarray: Points with those in the boxes removed.\n        \"\"\"\n        masks = box_np_ops.points_in_rbbox(points.coord.numpy(), boxes)\n        points = points[np.logical_not(masks.any(-1))]\n        return points\n\n    def __call__(self, input_dict):\n        \"\"\"Call function to sample ground truth objects to the data.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after object sampling augmentation,\n                'points', 'gt_bboxes_3d', 'gt_labels_3d' keys are updated\n                in the result dict.\n        \"\"\"\n\n\n\n        \n        gt_bboxes_3d = input_dict['gt_bboxes_3d']\n        gt_labels_3d = input_dict['gt_labels_3d']\n\n        if self.use_ground_plane and 'plane' in input_dict['ann_info']:\n            ground_plane = input_dict['ann_info']['plane']\n            input_dict['plane'] = ground_plane\n        else:\n            ground_plane = None\n        # change to float for blending operation\n        points = input_dict['points']\n        if self.sample_2d:\n            img = input_dict['img']\n            gt_bboxes_2d = input_dict['gt_bboxes']\n            # Assume for now 3D & 2D bboxes are the same\n            sampled_dict = self.db_sampler.sample_all(\n                gt_bboxes_3d.tensor.numpy(),\n                gt_labels_3d,\n                gt_bboxes_2d=gt_bboxes_2d,\n                img=img)\n        else:\n            sampled_dict = self.db_sampler.sample_all(\n                gt_bboxes_3d.tensor.numpy(),\n                gt_labels_3d,\n                img=None,\n                ground_plane=ground_plane)\n\n        if sampled_dict is not None:\n            sampled_gt_bboxes_3d = sampled_dict['gt_bboxes_3d']\n            sampled_points = sampled_dict['points']\n            sampled_gt_labels = sampled_dict['gt_labels_3d']\n\n            gt_labels_3d = np.concatenate([gt_labels_3d, sampled_gt_labels],\n                                          axis=0)\n            gt_bboxes_3d = gt_bboxes_3d.new_box(\n                np.concatenate(\n                    [gt_bboxes_3d.tensor.numpy(), sampled_gt_bboxes_3d]))\n\n            points = self.remove_points_in_boxes(points, sampled_gt_bboxes_3d)\n            # check the points dimension\n            points = points.cat([sampled_points, points])\n\n            if self.sample_2d:\n                sampled_gt_bboxes_2d = sampled_dict['gt_bboxes_2d']\n                gt_bboxes_2d = np.concatenate(\n                    [gt_bboxes_2d, sampled_gt_bboxes_2d]).astype(np.float32)\n\n                input_dict['gt_bboxes'] = gt_bboxes_2d\n                input_dict['img'] = sampled_dict['img']\n\n        input_dict['gt_bboxes_3d'] = gt_bboxes_3d\n        input_dict['gt_labels_3d'] = gt_labels_3d.astype(np.int64)\n        input_dict['points'] = points\n\n        return input_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f' sample_2d={self.sample_2d},'\n        repr_str += f' data_root={self.sampler_cfg.data_root},'\n        repr_str += f' info_path={self.sampler_cfg.info_path},'\n        repr_str += f' rate={self.sampler_cfg.rate},'\n        repr_str += f' prepare={self.sampler_cfg.prepare},'\n        repr_str += f' classes={self.sampler_cfg.classes},'\n        repr_str += f' sample_groups={self.sampler_cfg.sample_groups}'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass ObjectNoise(object):\n    \"\"\"Apply noise to each GT objects in the scene.\n\n    Args:\n        translation_std (list[float], optional): Standard deviation of the\n            distribution where translation noise are sampled from.\n            Defaults to [0.25, 0.25, 0.25].\n        global_rot_range (list[float], optional): Global rotation to the scene.\n            Defaults to [0.0, 0.0].\n        rot_range (list[float], optional): Object rotation range.\n            Defaults to [-0.15707963267, 0.15707963267].\n        num_try (int, optional): Number of times to try if the noise applied is\n            invalid. Defaults to 100.\n    \"\"\"\n\n    def __init__(self,\n                 translation_std=[0.25, 0.25, 0.25],\n                 global_rot_range=[0.0, 0.0],\n                 rot_range=[-0.15707963267, 0.15707963267],\n                 num_try=100):\n        self.translation_std = translation_std\n        self.global_rot_range = global_rot_range\n        self.rot_range = rot_range\n        self.num_try = num_try\n\n    def __call__(self, input_dict):\n        \"\"\"Call function to apply noise to each ground truth in the scene.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after adding noise to each object,\n                'points', 'gt_bboxes_3d' keys are updated in the result dict.\n        \"\"\"\n        gt_bboxes_3d = input_dict['gt_bboxes_3d']\n        points = input_dict['points']\n\n        # TODO: this is inplace operation\n        numpy_box = gt_bboxes_3d.tensor.numpy()\n        numpy_points = points.tensor.numpy()\n\n        noise_per_object_v3_(\n            numpy_box,\n            numpy_points,\n            rotation_perturb=self.rot_range,\n            center_noise_std=self.translation_std,\n            global_random_rot_range=self.global_rot_range,\n            num_try=self.num_try)\n\n        input_dict['gt_bboxes_3d'] = gt_bboxes_3d.new_box(numpy_box)\n        input_dict['points'] = points.new_point(numpy_points)\n        return input_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(num_try={self.num_try},'\n        repr_str += f' translation_std={self.translation_std},'\n        repr_str += f' global_rot_range={self.global_rot_range},'\n        repr_str += f' rot_range={self.rot_range})'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass GlobalAlignment(object):\n    \"\"\"Apply global alignment to 3D scene points by rotation and translation.\n\n    Args:\n        rotation_axis (int): Rotation axis for points and bboxes rotation.\n\n    Note:\n        We do not record the applied rotation and translation as in\n            GlobalRotScaleTrans. Because usually, we do not need to reverse\n            the alignment step.\n        For example, ScanNet 3D detection task uses aligned ground-truth\n            bounding boxes for evaluation.\n    \"\"\"\n\n    def __init__(self, rotation_axis):\n        self.rotation_axis = rotation_axis\n\n    def _trans_points(self, input_dict, trans_factor):\n        \"\"\"Private function to translate points.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n            trans_factor (np.ndarray): Translation vector to be applied.\n\n        Returns:\n            dict: Results after translation, 'points' is updated in the dict.\n        \"\"\"\n        input_dict['points'].translate(trans_factor)\n\n    def _rot_points(self, input_dict, rot_mat):\n        \"\"\"Private function to rotate bounding boxes and points.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n            rot_mat (np.ndarray): Rotation matrix to be applied.\n\n        Returns:\n            dict: Results after rotation, 'points' is updated in the dict.\n        \"\"\"\n        # input should be rot_mat_T so I transpose it here\n        input_dict['points'].rotate(rot_mat.T)\n\n    def _check_rot_mat(self, rot_mat):\n        \"\"\"Check if rotation matrix is valid for self.rotation_axis.\n\n        Args:\n            rot_mat (np.ndarray): Rotation matrix to be applied.\n        \"\"\"\n        is_valid = np.allclose(np.linalg.det(rot_mat), 1.0)\n        valid_array = np.zeros(3)\n        valid_array[self.rotation_axis] = 1.0\n        is_valid &= (rot_mat[self.rotation_axis, :] == valid_array).all()\n        is_valid &= (rot_mat[:, self.rotation_axis] == valid_array).all()\n        assert is_valid, f'invalid rotation matrix {rot_mat}'\n\n    def __call__(self, input_dict):\n        \"\"\"Call function to shuffle points.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after global alignment, 'points' and keys in\n                input_dict['bbox3d_fields'] are updated in the result dict.\n        \"\"\"\n        assert 'axis_align_matrix' in input_dict['ann_info'].keys(), \\\n            'axis_align_matrix is not provided in GlobalAlignment'\n\n        axis_align_matrix = input_dict['ann_info']['axis_align_matrix']\n        assert axis_align_matrix.shape == (4, 4), \\\n            f'invalid shape {axis_align_matrix.shape} for axis_align_matrix'\n        rot_mat = axis_align_matrix[:3, :3]\n        trans_vec = axis_align_matrix[:3, -1]\n\n        self._check_rot_mat(rot_mat)\n        self._rot_points(input_dict, rot_mat)\n        self._trans_points(input_dict, trans_vec)\n\n        return input_dict\n\n    def __repr__(self):\n        repr_str = self.__class__.__name__\n        repr_str += f'(rotation_axis={self.rotation_axis})'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass GlobalRotScaleTrans(object):\n    \"\"\"Apply global rotation, scaling and translation to a 3D scene.\n\n    Args:\n        rot_range (list[float], optional): Range of rotation angle.\n            Defaults to [-0.78539816, 0.78539816] (close to [-pi/4, pi/4]).\n        scale_ratio_range (list[float], optional): Range of scale ratio.\n            Defaults to [0.95, 1.05].\n        translation_std (list[float], optional): The standard deviation of\n            translation noise applied to a scene, which\n            is sampled from a gaussian distribution whose standard deviation\n            is set by ``translation_std``. Defaults to [0, 0, 0]\n        shift_height (bool, optional): Whether to shift height.\n            (the fourth dimension of indoor points) when scaling.\n            Defaults to False.\n    \"\"\"\n\n    def __init__(self,\n                 rot_range=[-0.78539816, 0.78539816],\n                 scale_ratio_range=[0.95, 1.05],\n                 translation_std=[0, 0, 0],\n                 shift_height=False):\n        seq_types = (list, tuple, np.ndarray)\n        if not isinstance(rot_range, seq_types):\n            assert isinstance(rot_range, (int, float)), \\\n                f'unsupported rot_range type {type(rot_range)}'\n            rot_range = [-rot_range, rot_range]\n        self.rot_range = rot_range\n\n        assert isinstance(scale_ratio_range, seq_types), \\\n            f'unsupported scale_ratio_range type {type(scale_ratio_range)}'\n        self.scale_ratio_range = scale_ratio_range\n\n        if not isinstance(translation_std, seq_types):\n            assert isinstance(translation_std, (int, float)), \\\n                f'unsupported translation_std type {type(translation_std)}'\n            translation_std = [\n                translation_std, translation_std, translation_std\n            ]\n        assert all([std >= 0 for std in translation_std]), \\\n            'translation_std should be positive'\n        self.translation_std = translation_std\n        self.shift_height = shift_height\n\n    def _trans_bbox_points(self, input_dict):\n        \"\"\"Private function to translate bounding boxes and points.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after translation, 'points', 'pcd_trans'\n                and keys in input_dict['bbox3d_fields'] are updated\n                in the result dict.\n        \"\"\"\n        translation_std = np.array(self.translation_std, dtype=np.float32)\n        trans_factor = np.random.normal(scale=translation_std, size=3).T\n\n        input_dict['points'].translate(trans_factor)\n        input_dict['pcd_trans'] = trans_factor\n        for key in input_dict['bbox3d_fields']:\n            input_dict[key].translate(trans_factor)\n\n    def _rot_bbox_points(self, input_dict):\n        \"\"\"Private function to rotate bounding boxes and points.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after rotation, 'points', 'pcd_rotation'\n                and keys in input_dict['bbox3d_fields'] are updated\n                in the result dict.\n        \"\"\"\n        rotation = self.rot_range\n        noise_rotation = np.random.uniform(rotation[0], rotation[1])\n\n        # if no bbox in input_dict, only rotate points\n        if len(input_dict['bbox3d_fields']) == 0:\n            rot_mat_T = input_dict['points'].rotate(noise_rotation)\n            input_dict['pcd_rotation'] = rot_mat_T\n            input_dict['pcd_rotation_angle'] = noise_rotation\n            return\n\n        # rotate points with bboxes\n        for key in input_dict['bbox3d_fields']:\n            if len(input_dict[key].tensor) != 0:\n                points, rot_mat_T = input_dict[key].rotate(\n                    noise_rotation, input_dict['points'])\n                input_dict['points'] = points\n                input_dict['pcd_rotation'] = rot_mat_T\n                input_dict['pcd_rotation_angle'] = noise_rotation\n\n    def _scale_bbox_points(self, input_dict):\n        \"\"\"Private function to scale bounding boxes and points.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after scaling, 'points'and keys in\n                input_dict['bbox3d_fields'] are updated in the result dict.\n        \"\"\"\n        scale = input_dict['pcd_scale_factor']\n        points = input_dict['points']\n        points.scale(scale)\n        if self.shift_height:\n            assert 'height' in points.attribute_dims.keys(), \\\n                'setting shift_height=True but points have no height attribute'\n            points.tensor[:, points.attribute_dims['height']] *= scale\n        input_dict['points'] = points\n\n        for key in input_dict['bbox3d_fields']:\n            input_dict[key].scale(scale)\n\n    def _random_scale(self, input_dict):\n        \"\"\"Private function to randomly set the scale factor.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after scaling, 'pcd_scale_factor' are updated\n                in the result dict.\n        \"\"\"\n        scale_factor = np.random.uniform(self.scale_ratio_range[0],\n                                         self.scale_ratio_range[1])\n        input_dict['pcd_scale_factor'] = scale_factor\n\n    def __call__(self, input_dict):\n        \"\"\"Private function to rotate, scale and translate bounding boxes and\n        points.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after scaling, 'points', 'pcd_rotation',\n                'pcd_scale_factor', 'pcd_trans' and keys in\n                input_dict['bbox3d_fields'] are updated in the result dict.\n        \"\"\"\n        if 'transformation_3d_flow' not in input_dict:\n            input_dict['transformation_3d_flow'] = []\n\n        self._rot_bbox_points(input_dict)\n\n        if 'pcd_scale_factor' not in input_dict:\n            self._random_scale(input_dict)\n        self._scale_bbox_points(input_dict)\n\n        self._trans_bbox_points(input_dict)\n\n        input_dict['transformation_3d_flow'].extend(['R', 'S', 'T'])\n        return input_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(rot_range={self.rot_range},'\n        repr_str += f' scale_ratio_range={self.scale_ratio_range},'\n        repr_str += f' translation_std={self.translation_std},'\n        repr_str += f' shift_height={self.shift_height})'\n        return repr_str\n\n\n\n\n@PIPELINES.register_module()\nclass RotScaleTransPoints(object):\n    \"\"\"Apply global rotation, scaling and translation to a 3D scene.\n\n    Args:\n        rot_range (list[float], optional): Range of rotation angle.\n            Defaults to [-0.78539816, 0.78539816] (close to [-pi/4, pi/4]).\n        scale_ratio_range (list[float], optional): Range of scale ratio.\n            Defaults to [0.95, 1.05].\n        translation_std (list[float], optional): The standard deviation of\n            translation noise applied to a scene, which\n            is sampled from a gaussian distribution whose standard deviation\n            is set by ``translation_std``. Defaults to [0, 0, 0]\n        shift_height (bool, optional): Whether to shift height.\n            (the fourth dimension of indoor points) when scaling.\n            Defaults to False.\n    \"\"\"\n\n\n\n    def _trans_bbox_points(self, input_dict):\n        \"\"\"Private function to translate bounding boxes and points.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after translation, 'points', 'pcd_trans'\n                and keys in input_dict['bbox3d_fields'] are updated\n                in the result dict.\n        \"\"\"\n        translation_std = np.array(self.translation_std, dtype=np.float32)\n        trans_factor = np.random.normal(scale=translation_std, size=3).T\n\n        input_dict['points'].translate(trans_factor)\n        input_dict['pcd_trans'] = trans_factor\n        for key in input_dict['bbox3d_fields']:\n            input_dict[key].translate(trans_factor)\n\n    def _rot_bbox_points(self, input_dict):\n        \"\"\"Private function to rotate bounding boxes and points.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after rotation, 'points', 'pcd_rotation'\n                and keys in input_dict['bbox3d_fields'] are updated\n                in the result dict.\n        \"\"\"\n        rotation = self.rot_range\n        noise_rotation = np.random.uniform(rotation[0], rotation[1])\n\n        # if no bbox in input_dict, only rotate points\n        if len(input_dict['bbox3d_fields']) == 0:\n            rot_mat_T = input_dict['points'].rotate(noise_rotation)\n            input_dict['pcd_rotation'] = rot_mat_T\n            input_dict['pcd_rotation_angle'] = noise_rotation\n            return\n\n        # rotate points with bboxes\n        for key in input_dict['bbox3d_fields']:\n            if len(input_dict[key].tensor) != 0:\n                points, rot_mat_T = input_dict[key].rotate(\n                    noise_rotation, input_dict['points'])\n                input_dict['points'] = points\n                input_dict['pcd_rotation'] = rot_mat_T\n                input_dict['pcd_rotation_angle'] = noise_rotation\n\n    def _scale_bbox_points(self, input_dict):\n        \"\"\"Private function to scale bounding boxes and points.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after scaling, 'points'and keys in\n                input_dict['bbox3d_fields'] are updated in the result dict.\n        \"\"\"\n        scale = input_dict['pcd_scale_factor']\n        points = input_dict['points']\n        points.scale(scale)\n        if self.shift_height:\n            assert 'height' in points.attribute_dims.keys(), \\\n                'setting shift_height=True but points have no height attribute'\n            points.tensor[:, points.attribute_dims['height']] *= scale\n        input_dict['points'] = points\n\n        for key in input_dict['bbox3d_fields']:\n            input_dict[key].scale(scale)\n\n    def _random_scale(self, input_dict):\n        \"\"\"Private function to randomly set the scale factor.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after scaling, 'pcd_scale_factor' are updated\n                in the result dict.\n        \"\"\"\n        scale_factor = np.random.uniform(self.scale_ratio_range[0],\n                                         self.scale_ratio_range[1])\n        input_dict['pcd_scale_factor'] = scale_factor\n\n    def __call__(self, input_dict):\n        \"\"\"Private function to rotate, scale and translate bounding boxes and\n        points.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after scaling, 'points', 'pcd_rotation',\n                'pcd_scale_factor', 'pcd_trans' and keys in\n                input_dict['bbox3d_fields'] are updated in the result dict.\n        \"\"\"\n        \n\n        self._rot_bbox_points(input_dict)\n\n        self._trans_bbox_points(input_dict)\n\n        input_dict['transformation_3d_flow'].extend(['R', 'S', 'T'])\n        return input_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(rot_range={self.rot_range},'\n        repr_str += f' scale_ratio_range={self.scale_ratio_range},'\n        repr_str += f' translation_std={self.translation_std},'\n        repr_str += f' shift_height={self.shift_height})'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass PointShuffle(object):\n    \"\"\"Shuffle input points.\"\"\"\n\n    def __call__(self, input_dict):\n        \"\"\"Call function to shuffle points.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after filtering, 'points', 'pts_instance_mask'\n                and 'pts_semantic_mask' keys are updated in the result dict.\n        \"\"\"\n        idx = input_dict['points'].shuffle()\n        idx = idx.numpy()\n\n        pts_instance_mask = input_dict.get('pts_instance_mask', None)\n        pts_semantic_mask = input_dict.get('pts_semantic_mask', None)\n\n        if pts_instance_mask is not None:\n            input_dict['pts_instance_mask'] = pts_instance_mask[idx]\n\n        if pts_semantic_mask is not None:\n            input_dict['pts_semantic_mask'] = pts_semantic_mask[idx]\n\n        return input_dict\n\n    def __repr__(self):\n        return self.__class__.__name__\n\n\n@PIPELINES.register_module()\nclass ObjectRangeFilter(object):\n    \"\"\"Filter objects by the range.\n\n    Args:\n        point_cloud_range (list[float]): Point cloud range.\n    \"\"\"\n\n    def __init__(self, point_cloud_range):\n        self.pcd_range = np.array(point_cloud_range, dtype=np.float32)\n\n    def __call__(self, input_dict):\n        \"\"\"Call function to filter objects by the range.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d'\n                keys are updated in the result dict.\n        \"\"\"\n        # Check points instance type and initialise bev_range\n        if isinstance(input_dict['gt_bboxes_3d'],\n                      (LiDARInstance3DBoxes, DepthInstance3DBoxes)):\n            bev_range = self.pcd_range[[0, 1, 3, 4]]\n        elif isinstance(input_dict['gt_bboxes_3d'], CameraInstance3DBoxes):\n            bev_range = self.pcd_range[[0, 2, 3, 5]]\n\n        gt_bboxes_3d = input_dict['gt_bboxes_3d']\n        gt_labels_3d = input_dict['gt_labels_3d']\n        mask = gt_bboxes_3d.in_range_bev(bev_range)\n        gt_bboxes_3d = gt_bboxes_3d[mask]\n        # mask is a torch tensor but gt_labels_3d is still numpy array\n        # using mask to index gt_labels_3d will cause bug when\n        # len(gt_labels_3d) == 1, where mask=1 will be interpreted\n        # as gt_labels_3d[1] and cause out of index error\n        gt_labels_3d = gt_labels_3d[mask.numpy().astype(np.bool)]\n\n        if 'instance_inds' in input_dict.keys():\n            input_dict['instance_inds'] = input_dict['instance_inds'][mask.numpy().astype(np.bool)]\n        \n        if 'gt_agent_fut_traj' in input_dict.keys():\n            input_dict['gt_agent_fut_traj'] = input_dict['gt_agent_fut_traj'][mask.numpy().astype(np.bool)]\n            input_dict['gt_agent_fut_traj_mask'] = input_dict['gt_agent_fut_traj_mask'][mask.numpy().astype(np.bool)]\n        # limit rad to [-pi, pi]\n        gt_bboxes_3d.limit_yaw(offset=0.5, period=2 * np.pi)\n        input_dict['gt_bboxes_3d'] = gt_bboxes_3d\n        input_dict['gt_labels_3d'] = gt_labels_3d\n\n        return input_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(point_cloud_range={self.pcd_range.tolist()})'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass PointsRangeFilter(object):\n    \"\"\"Filter points by the range.\n\n    Args:\n        point_cloud_range (list[float]): Point cloud range.\n    \"\"\"\n\n    def __init__(self, point_cloud_range):\n        self.pcd_range = np.array(point_cloud_range, dtype=np.float32)\n\n    def __call__(self, input_dict):\n        \"\"\"Call function to filter points by the range.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after filtering, 'points', 'pts_instance_mask'\n                and 'pts_semantic_mask' keys are updated in the result dict.\n        \"\"\"\n        points = input_dict['points']\n        points_mask = points.in_range_3d(self.pcd_range)\n        clean_points = points[points_mask]\n        input_dict['points'] = clean_points\n        points_mask = points_mask.numpy()\n\n        pts_instance_mask = input_dict.get('pts_instance_mask', None)\n        pts_semantic_mask = input_dict.get('pts_semantic_mask', None)\n\n        if pts_instance_mask is not None:\n            input_dict['pts_instance_mask'] = pts_instance_mask[points_mask]\n\n        if pts_semantic_mask is not None:\n            input_dict['pts_semantic_mask'] = pts_semantic_mask[points_mask]\n\n\n        return input_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(point_cloud_range={self.pcd_range.tolist()})'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass ObjectNameFilter(object):\n    \"\"\"Filter GT objects by their names.\n\n    Args:\n        classes (list[str]): List of class names to be kept for training.\n    \"\"\"\n\n    def __init__(self, classes):\n        self.classes = classes\n        self.labels = list(range(len(self.classes)))\n\n    def __call__(self, input_dict):\n        \"\"\"Call function to filter objects by their names.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d'\n                keys are updated in the result dict.\n        \"\"\"\n        gt_labels_3d = input_dict['gt_labels_3d']\n        gt_bboxes_mask = np.array([n in self.labels for n in gt_labels_3d],\n                                  dtype=np.bool_)\n        input_dict['gt_bboxes_3d'] = input_dict['gt_bboxes_3d'][gt_bboxes_mask]\n        input_dict['gt_labels_3d'] = input_dict['gt_labels_3d'][gt_bboxes_mask]\n        if 'instance_inds' in input_dict.keys():\n            input_dict['instance_inds'] = input_dict['instance_inds'][gt_bboxes_mask]\n\n        if 'gt_agent_fut_traj' in input_dict.keys():\n            input_dict['gt_agent_fut_traj'] = input_dict['gt_agent_fut_traj'][gt_bboxes_mask]\n            input_dict['gt_agent_fut_traj_mask'] = input_dict['gt_agent_fut_traj_mask'][gt_bboxes_mask]\n\n        return input_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(classes={self.classes})'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass PointSample(object):\n    \"\"\"Point sample.\n\n    Sampling data to a certain number.\n\n    Args:\n        num_points (int): Number of points to be sampled.\n        sample_range (float, optional): The range where to sample points.\n            If not None, the points with depth larger than `sample_range` are\n            prior to be sampled. Defaults to None.\n        replace (bool, optional): Whether the sampling is with or without\n            replacement. Defaults to False.\n    \"\"\"\n\n    def __init__(self, num_points, sample_range=None, replace=False):\n        self.num_points = num_points\n        self.sample_range = sample_range\n        self.replace = replace\n\n    def _points_random_sampling(self,\n                                points,\n                                num_samples,\n                                sample_range=None,\n                                replace=False,\n                                return_choices=False):\n        \"\"\"Points random sampling.\n\n        Sample points to a certain number.\n\n        Args:\n            points (np.ndarray | :obj:`BasePoints`): 3D Points.\n            num_samples (int): Number of samples to be sampled.\n            sample_range (float, optional): Indicating the range where the\n                points will be sampled. Defaults to None.\n            replace (bool, optional): Sampling with or without replacement.\n                Defaults to None.\n            return_choices (bool, optional): Whether return choice.\n                Defaults to False.\n        Returns:\n            tuple[np.ndarray] | np.ndarray:\n                - points (np.ndarray | :obj:`BasePoints`): 3D Points.\n                - choices (np.ndarray, optional): The generated random samples.\n        \"\"\"\n        if not replace:\n            replace = (points.shape[0] < num_samples)\n        point_range = range(len(points))\n        if sample_range is not None and not replace:\n            # Only sampling the near points when len(points) >= num_samples\n            dist = np.linalg.norm(points.tensor, axis=1)\n            far_inds = np.where(dist >= sample_range)[0]\n            near_inds = np.where(dist < sample_range)[0]\n            # in case there are too many far points\n            if len(far_inds) > num_samples:\n                far_inds = np.random.choice(\n                    far_inds, num_samples, replace=False)\n            point_range = near_inds\n            num_samples -= len(far_inds)\n        choices = np.random.choice(point_range, num_samples, replace=replace)\n        if sample_range is not None and not replace:\n            choices = np.concatenate((far_inds, choices))\n            # Shuffle points after sampling\n            np.random.shuffle(choices)\n        if return_choices:\n            return points[choices], choices\n        else:\n            return points[choices]\n\n    def __call__(self, results):\n        \"\"\"Call function to sample points to in indoor scenes.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n        Returns:\n            dict: Results after sampling, 'points', 'pts_instance_mask'\n                and 'pts_semantic_mask' keys are updated in the result dict.\n        \"\"\"\n        points = results['points']\n        points, choices = self._points_random_sampling(\n            points,\n            self.num_points,\n            self.sample_range,\n            self.replace,\n            return_choices=True)\n        results['points'] = points\n\n        pts_instance_mask = results.get('pts_instance_mask', None)\n        pts_semantic_mask = results.get('pts_semantic_mask', None)\n\n        if pts_instance_mask is not None:\n            pts_instance_mask = pts_instance_mask[choices]\n            results['pts_instance_mask'] = pts_instance_mask\n\n        if pts_semantic_mask is not None:\n            pts_semantic_mask = pts_semantic_mask[choices]\n            results['pts_semantic_mask'] = pts_semantic_mask\n\n        return results\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(num_points={self.num_points},'\n        repr_str += f' sample_range={self.sample_range},'\n        repr_str += f' replace={self.replace})'\n\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass IndoorPointSample(PointSample):\n    \"\"\"Indoor point sample.\n\n    Sampling data to a certain number.\n    NOTE: IndoorPointSample is deprecated in favor of PointSample\n\n    Args:\n        num_points (int): Number of points to be sampled.\n    \"\"\"\n\n    def __init__(self, *args, **kwargs):\n        warnings.warn(\n            'IndoorPointSample is deprecated in favor of PointSample')\n        super(IndoorPointSample, self).__init__(*args, **kwargs)\n\n\n@PIPELINES.register_module()\nclass IndoorPatchPointSample(object):\n    r\"\"\"Indoor point sample within a patch. Modified from `PointNet++ <https://\n    github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py>`_.\n\n    Sampling data to a certain number for semantic segmentation.\n\n    Args:\n        num_points (int): Number of points to be sampled.\n        block_size (float, optional): Size of a block to sample points from.\n            Defaults to 1.5.\n        sample_rate (float, optional): Stride used in sliding patch generation.\n            This parameter is unused in `IndoorPatchPointSample` and thus has\n            been deprecated. We plan to remove it in the future.\n            Defaults to None.\n        ignore_index (int, optional): Label index that won't be used for the\n            segmentation task. This is set in PointSegClassMapping as neg_cls.\n            If not None, will be used as a patch selection criterion.\n            Defaults to None.\n        use_normalized_coord (bool, optional): Whether to use normalized xyz as\n            additional features. Defaults to False.\n        num_try (int, optional): Number of times to try if the patch selected\n            is invalid. Defaults to 10.\n        enlarge_size (float, optional): Enlarge the sampled patch to\n            [-block_size / 2 - enlarge_size, block_size / 2 + enlarge_size] as\n            an augmentation. If None, set it as 0. Defaults to 0.2.\n        min_unique_num (int, optional): Minimum number of unique points\n            the sampled patch should contain. If None, use PointNet++'s method\n            to judge uniqueness. Defaults to None.\n        eps (float, optional): A value added to patch boundary to guarantee\n            points coverage. Defaults to 1e-2.\n\n    Note:\n        This transform should only be used in the training process of point\n            cloud segmentation tasks. For the sliding patch generation and\n            inference process in testing, please refer to the `slide_inference`\n            function of `EncoderDecoder3D` class.\n    \"\"\"\n\n    def __init__(self,\n                 num_points,\n                 block_size=1.5,\n                 sample_rate=None,\n                 ignore_index=None,\n                 use_normalized_coord=False,\n                 num_try=10,\n                 enlarge_size=0.2,\n                 min_unique_num=None,\n                 eps=1e-2):\n        self.num_points = num_points\n        self.block_size = block_size\n        self.ignore_index = ignore_index\n        self.use_normalized_coord = use_normalized_coord\n        self.num_try = num_try\n        self.enlarge_size = enlarge_size if enlarge_size is not None else 0.0\n        self.min_unique_num = min_unique_num\n        self.eps = eps\n\n        if sample_rate is not None:\n            warnings.warn(\n                \"'sample_rate' has been deprecated and will be removed in \"\n                'the future. Please remove them from your code.')\n\n    def _input_generation(self, coords, patch_center, coord_max, attributes,\n                          attribute_dims, point_type):\n        \"\"\"Generating model input.\n\n        Generate input by subtracting patch center and adding additional\n            features. Currently support colors and normalized xyz as features.\n\n        Args:\n            coords (np.ndarray): Sampled 3D Points.\n            patch_center (np.ndarray): Center coordinate of the selected patch.\n            coord_max (np.ndarray): Max coordinate of all 3D Points.\n            attributes (np.ndarray): features of input points.\n            attribute_dims (dict): Dictionary to indicate the meaning of extra\n                dimension.\n            point_type (type): class of input points inherited from BasePoints.\n\n        Returns:\n            :obj:`BasePoints`: The generated input data.\n        \"\"\"\n        # subtract patch center, the z dimension is not centered\n        centered_coords = coords.copy()\n        centered_coords[:, 0] -= patch_center[0]\n        centered_coords[:, 1] -= patch_center[1]\n\n        if self.use_normalized_coord:\n            normalized_coord = coords / coord_max\n            attributes = np.concatenate([attributes, normalized_coord], axis=1)\n            if attribute_dims is None:\n                attribute_dims = dict()\n            attribute_dims.update(\n                dict(normalized_coord=[\n                    attributes.shape[1], attributes.shape[1] +\n                    1, attributes.shape[1] + 2\n                ]))\n\n        points = np.concatenate([centered_coords, attributes], axis=1)\n        points = point_type(\n            points, points_dim=points.shape[1], attribute_dims=attribute_dims)\n\n        return points\n\n    def _patch_points_sampling(self, points, sem_mask):\n        \"\"\"Patch points sampling.\n\n        First sample a valid patch.\n        Then sample points within that patch to a certain number.\n\n        Args:\n            points (:obj:`BasePoints`): 3D Points.\n            sem_mask (np.ndarray): semantic segmentation mask for input points.\n\n        Returns:\n            tuple[:obj:`BasePoints`, np.ndarray] | :obj:`BasePoints`:\n\n                - points (:obj:`BasePoints`): 3D Points.\n                - choices (np.ndarray): The generated random samples.\n        \"\"\"\n        coords = points.coord.numpy()\n        attributes = points.tensor[:, 3:].numpy()\n        attribute_dims = points.attribute_dims\n        point_type = type(points)\n\n        coord_max = np.amax(coords, axis=0)\n        coord_min = np.amin(coords, axis=0)\n\n        for _ in range(self.num_try):\n            # random sample a point as patch center\n            cur_center = coords[np.random.choice(coords.shape[0])]\n\n            # boundary of a patch, which would be enlarged by\n            # `self.enlarge_size` as an augmentation\n            cur_max = cur_center + np.array(\n                [self.block_size / 2.0, self.block_size / 2.0, 0.0])\n            cur_min = cur_center - np.array(\n                [self.block_size / 2.0, self.block_size / 2.0, 0.0])\n            cur_max[2] = coord_max[2]\n            cur_min[2] = coord_min[2]\n            cur_choice = np.sum(\n                (coords >= (cur_min - self.enlarge_size)) *\n                (coords <= (cur_max + self.enlarge_size)),\n                axis=1) == 3\n\n            if not cur_choice.any():  # no points in this patch\n                continue\n\n            cur_coords = coords[cur_choice, :]\n            cur_sem_mask = sem_mask[cur_choice]\n            point_idxs = np.where(cur_choice)[0]\n            mask = np.sum(\n                (cur_coords >= (cur_min - self.eps)) * (cur_coords <=\n                                                        (cur_max + self.eps)),\n                axis=1) == 3\n\n            # two criteria for patch sampling, adopted from PointNet++\n            # 1. selected patch should contain enough unique points\n            if self.min_unique_num is None:\n                # use PointNet++'s method as default\n                # [31, 31, 62] are just some big values used to transform\n                # coords from 3d array to 1d and then check their uniqueness\n                # this is used in all the ScanNet code following PointNet++\n                vidx = np.ceil(\n                    (cur_coords[mask, :] - cur_min) / (cur_max - cur_min) *\n                    np.array([31.0, 31.0, 62.0]))\n                vidx = np.unique(vidx[:, 0] * 31.0 * 62.0 + vidx[:, 1] * 62.0 +\n                                 vidx[:, 2])\n                flag1 = len(vidx) / 31.0 / 31.0 / 62.0 >= 0.02\n            else:\n                # if `min_unique_num` is provided, directly compare with it\n                flag1 = mask.sum() >= self.min_unique_num\n\n            # 2. selected patch should contain enough annotated points\n            if self.ignore_index is None:\n                flag2 = True\n            else:\n                flag2 = np.sum(cur_sem_mask != self.ignore_index) / \\\n                               len(cur_sem_mask) >= 0.7\n\n            if flag1 and flag2:\n                break\n\n        # sample idx to `self.num_points`\n        if point_idxs.size >= self.num_points:\n            # no duplicate in sub-sampling\n            choices = np.random.choice(\n                point_idxs, self.num_points, replace=False)\n        else:\n            # do not use random choice here to avoid some points not counted\n            dup = np.random.choice(point_idxs.size,\n                                   self.num_points - point_idxs.size)\n            idx_dup = np.concatenate(\n                [np.arange(point_idxs.size),\n                 np.array(dup)], 0)\n            choices = point_idxs[idx_dup]\n\n        # construct model input\n        points = self._input_generation(coords[choices], cur_center, coord_max,\n                                        attributes[choices], attribute_dims,\n                                        point_type)\n\n        return points, choices\n\n    def __call__(self, results):\n        \"\"\"Call function to sample points to in indoor scenes.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after sampling, 'points', 'pts_instance_mask'\n                and 'pts_semantic_mask' keys are updated in the result dict.\n        \"\"\"\n        points = results['points']\n\n        assert 'pts_semantic_mask' in results.keys(), \\\n            'semantic mask should be provided in training and evaluation'\n        pts_semantic_mask = results['pts_semantic_mask']\n\n        points, choices = self._patch_points_sampling(points,\n                                                      pts_semantic_mask)\n\n        results['points'] = points\n        results['pts_semantic_mask'] = pts_semantic_mask[choices]\n        pts_instance_mask = results.get('pts_instance_mask', None)\n        if pts_instance_mask is not None:\n            results['pts_instance_mask'] = pts_instance_mask[choices]\n\n        return results\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(num_points={self.num_points},'\n        repr_str += f' block_size={self.block_size},'\n        repr_str += f' ignore_index={self.ignore_index},'\n        repr_str += f' use_normalized_coord={self.use_normalized_coord},'\n        repr_str += f' num_try={self.num_try},'\n        repr_str += f' enlarge_size={self.enlarge_size},'\n        repr_str += f' min_unique_num={self.min_unique_num},'\n        repr_str += f' eps={self.eps})'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass BackgroundPointsFilter(object):\n    \"\"\"Filter background points near the bounding box.\n\n    Args:\n        bbox_enlarge_range (tuple[float], float): Bbox enlarge range.\n    \"\"\"\n\n    def __init__(self, bbox_enlarge_range):\n        assert (is_tuple_of(bbox_enlarge_range, float)\n                and len(bbox_enlarge_range) == 3) \\\n            or isinstance(bbox_enlarge_range, float), \\\n            f'Invalid arguments bbox_enlarge_range {bbox_enlarge_range}'\n\n        if isinstance(bbox_enlarge_range, float):\n            bbox_enlarge_range = [bbox_enlarge_range] * 3\n        self.bbox_enlarge_range = np.array(\n            bbox_enlarge_range, dtype=np.float32)[np.newaxis, :]\n\n    def __call__(self, input_dict):\n        \"\"\"Call function to filter points by the range.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after filtering, 'points', 'pts_instance_mask'\n                and 'pts_semantic_mask' keys are updated in the result dict.\n        \"\"\"\n        points = input_dict['points']\n        gt_bboxes_3d = input_dict['gt_bboxes_3d']\n\n        # avoid groundtruth being modified\n        gt_bboxes_3d_np = gt_bboxes_3d.tensor.clone().numpy()\n        gt_bboxes_3d_np[:, :3] = gt_bboxes_3d.gravity_center.clone().numpy()\n\n        enlarged_gt_bboxes_3d = gt_bboxes_3d_np.copy()\n        enlarged_gt_bboxes_3d[:, 3:6] += self.bbox_enlarge_range\n        points_numpy = points.tensor.clone().numpy()\n        foreground_masks = box_np_ops.points_in_rbbox(\n            points_numpy, gt_bboxes_3d_np, origin=(0.5, 0.5, 0.5))\n        enlarge_foreground_masks = box_np_ops.points_in_rbbox(\n            points_numpy, enlarged_gt_bboxes_3d, origin=(0.5, 0.5, 0.5))\n        foreground_masks = foreground_masks.max(1)\n        enlarge_foreground_masks = enlarge_foreground_masks.max(1)\n        valid_masks = ~np.logical_and(~foreground_masks,\n                                      enlarge_foreground_masks)\n\n        input_dict['points'] = points[valid_masks]\n        pts_instance_mask = input_dict.get('pts_instance_mask', None)\n        if pts_instance_mask is not None:\n            input_dict['pts_instance_mask'] = pts_instance_mask[valid_masks]\n\n        pts_semantic_mask = input_dict.get('pts_semantic_mask', None)\n        if pts_semantic_mask is not None:\n            input_dict['pts_semantic_mask'] = pts_semantic_mask[valid_masks]\n        return input_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(bbox_enlarge_range={self.bbox_enlarge_range.tolist()})'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass VoxelBasedPointSampler(object):\n    \"\"\"Voxel based point sampler.\n\n    Apply voxel sampling to multiple sweep points.\n\n    Args:\n        cur_sweep_cfg (dict): Config for sampling current points.\n        prev_sweep_cfg (dict): Config for sampling previous points.\n        time_dim (int): Index that indicate the time dimension\n            for input points.\n    \"\"\"\n\n    def __init__(self, cur_sweep_cfg, prev_sweep_cfg=None, time_dim=3):\n        self.cur_voxel_generator = VoxelGenerator(**cur_sweep_cfg)\n        self.cur_voxel_num = self.cur_voxel_generator._max_voxels\n        self.time_dim = time_dim\n        if prev_sweep_cfg is not None:\n            assert prev_sweep_cfg['max_num_points'] == \\\n                cur_sweep_cfg['max_num_points']\n            self.prev_voxel_generator = VoxelGenerator(**prev_sweep_cfg)\n            self.prev_voxel_num = self.prev_voxel_generator._max_voxels\n        else:\n            self.prev_voxel_generator = None\n            self.prev_voxel_num = 0\n\n    def _sample_points(self, points, sampler, point_dim):\n        \"\"\"Sample points for each points subset.\n\n        Args:\n            points (np.ndarray): Points subset to be sampled.\n            sampler (VoxelGenerator): Voxel based sampler for\n                each points subset.\n            point_dim (int): The dimension of each points\n\n        Returns:\n            np.ndarray: Sampled points.\n        \"\"\"\n        voxels, coors, num_points_per_voxel = sampler.generate(points)\n        if voxels.shape[0] < sampler._max_voxels:\n            padding_points = np.zeros([\n                sampler._max_voxels - voxels.shape[0], sampler._max_num_points,\n                point_dim\n            ],\n                                      dtype=points.dtype)\n            padding_points[:] = voxels[0]\n            sample_points = np.concatenate([voxels, padding_points], axis=0)\n        else:\n            sample_points = voxels\n\n        return sample_points\n\n    def __call__(self, results):\n        \"\"\"Call function to sample points from multiple sweeps.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after sampling, 'points', 'pts_instance_mask'\n                and 'pts_semantic_mask' keys are updated in the result dict.\n        \"\"\"\n        points = results['points']\n        original_dim = points.shape[1]\n\n        # TODO: process instance and semantic mask while _max_num_points\n        # is larger than 1\n        # Extend points with seg and mask fields\n        map_fields2dim = []\n        start_dim = original_dim\n        points_numpy = points.tensor.numpy()\n        extra_channel = [points_numpy]\n        for idx, key in enumerate(results['pts_mask_fields']):\n            map_fields2dim.append((key, idx + start_dim))\n            extra_channel.append(results[key][..., None])\n\n        start_dim += len(results['pts_mask_fields'])\n        for idx, key in enumerate(results['pts_seg_fields']):\n            map_fields2dim.append((key, idx + start_dim))\n            extra_channel.append(results[key][..., None])\n\n        points_numpy = np.concatenate(extra_channel, axis=-1)\n\n        # Split points into two part, current sweep points and\n        # previous sweeps points.\n        # TODO: support different sampling methods for next sweeps points\n        # and previous sweeps points.\n        cur_points_flag = (points_numpy[:, self.time_dim] == 0)\n        cur_sweep_points = points_numpy[cur_points_flag]\n        prev_sweeps_points = points_numpy[~cur_points_flag]\n        if prev_sweeps_points.shape[0] == 0:\n            prev_sweeps_points = cur_sweep_points\n\n        # Shuffle points before sampling\n        np.random.shuffle(cur_sweep_points)\n        np.random.shuffle(prev_sweeps_points)\n\n        cur_sweep_points = self._sample_points(cur_sweep_points,\n                                               self.cur_voxel_generator,\n                                               points_numpy.shape[1])\n        if self.prev_voxel_generator is not None:\n            prev_sweeps_points = self._sample_points(prev_sweeps_points,\n                                                     self.prev_voxel_generator,\n                                                     points_numpy.shape[1])\n\n            points_numpy = np.concatenate(\n                [cur_sweep_points, prev_sweeps_points], 0)\n        else:\n            points_numpy = cur_sweep_points\n\n        if self.cur_voxel_generator._max_num_points == 1:\n            points_numpy = points_numpy.squeeze(1)\n        results['points'] = points.new_point(points_numpy[..., :original_dim])\n\n        # Restore the corresponding seg and mask fields\n        for key, dim_index in map_fields2dim:\n            results[key] = points_numpy[..., dim_index]\n\n        return results\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n\n        def _auto_indent(repr_str, indent):\n            repr_str = repr_str.split('\\n')\n            repr_str = [' ' * indent + t + '\\n' for t in repr_str]\n            repr_str = ''.join(repr_str)[:-1]\n            return repr_str\n\n        repr_str = self.__class__.__name__\n        indent = 4\n        repr_str += '(\\n'\n        repr_str += ' ' * indent + f'num_cur_sweep={self.cur_voxel_num},\\n'\n        repr_str += ' ' * indent + f'num_prev_sweep={self.prev_voxel_num},\\n'\n        repr_str += ' ' * indent + f'time_dim={self.time_dim},\\n'\n        repr_str += ' ' * indent + 'cur_voxel_generator=\\n'\n        repr_str += f'{_auto_indent(repr(self.cur_voxel_generator), 8)},\\n'\n        repr_str += ' ' * indent + 'prev_voxel_generator=\\n'\n        repr_str += f'{_auto_indent(repr(self.prev_voxel_generator), 8)})'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass AffineResize(object):\n    \"\"\"Get the affine transform matrices to the target size.\n\n    Different from :class:`RandomAffine` in MMDetection, this class can\n    calculate the affine transform matrices while resizing the input image\n    to a fixed size. The affine transform matrices include: 1) matrix\n    transforming original image to the network input image size. 2) matrix\n    transforming original image to the network output feature map size.\n\n    Args:\n        img_scale (tuple): Images scales for resizing.\n        down_ratio (int): The down ratio of feature map.\n            Actually the arg should be >= 1.\n        bbox_clip_border (bool, optional): Whether clip the objects\n            outside the border of the image. Defaults to True.\n    \"\"\"\n\n    def __init__(self, img_scale, down_ratio, bbox_clip_border=True):\n\n        self.img_scale = img_scale\n        self.down_ratio = down_ratio\n        self.bbox_clip_border = bbox_clip_border\n\n    def __call__(self, results):\n        \"\"\"Call function to do affine transform to input image and labels.\n\n        Args:\n            results (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after affine resize, 'affine_aug', 'trans_mat'\n                keys are added in the result dict.\n        \"\"\"\n        # The results have gone through RandomShiftScale before AffineResize\n        if 'center' not in results:\n            img = results['img']\n            height, width = img.shape[:2]\n            center = np.array([width / 2, height / 2], dtype=np.float32)\n            size = np.array([width, height], dtype=np.float32)\n            results['affine_aug'] = False\n        else:\n            # The results did not go through RandomShiftScale before\n            # AffineResize\n            img = results['img']\n            center = results['center']\n            size = results['size']\n\n        trans_affine = self._get_transform_matrix(center, size, self.img_scale)\n\n        img = cv2.warpAffine(img, trans_affine[:2, :], self.img_scale)\n\n        if isinstance(self.down_ratio, tuple):\n            trans_mat = [\n                self._get_transform_matrix(\n                    center, size,\n                    (self.img_scale[0] // ratio, self.img_scale[1] // ratio))\n                for ratio in self.down_ratio\n            ]  # (3, 3)\n        else:\n            trans_mat = self._get_transform_matrix(\n                center, size, (self.img_scale[0] // self.down_ratio,\n                               self.img_scale[1] // self.down_ratio))\n\n        results['img'] = img\n        results['img_shape'] = img.shape\n        results['pad_shape'] = img.shape\n        results['trans_mat'] = trans_mat\n\n        self._affine_bboxes(results, trans_affine)\n\n        if 'centers2d' in results:\n            centers2d = self._affine_transform(results['centers2d'],\n                                               trans_affine)\n            valid_index = (centers2d[:, 0] >\n                           0) & (centers2d[:, 0] <\n                                 self.img_scale[0]) & (centers2d[:, 1] > 0) & (\n                                     centers2d[:, 1] < self.img_scale[1])\n            results['centers2d'] = centers2d[valid_index]\n\n            for key in results.get('bbox_fields', []):\n                if key in ['gt_bboxes']:\n                    results[key] = results[key][valid_index]\n                    if 'gt_labels' in results:\n                        results['gt_labels'] = results['gt_labels'][\n                            valid_index]\n                    if 'gt_masks' in results:\n                        raise NotImplementedError(\n                            'AffineResize only supports bbox.')\n\n            for key in results.get('bbox3d_fields', []):\n                if key in ['gt_bboxes_3d']:\n                    results[key].tensor = results[key].tensor[valid_index]\n                    if 'gt_labels_3d' in results:\n                        results['gt_labels_3d'] = results['gt_labels_3d'][\n                            valid_index]\n\n            results['depths'] = results['depths'][valid_index]\n\n        return results\n\n    def _affine_bboxes(self, results, matrix):\n        \"\"\"Affine transform bboxes to input image.\n\n        Args:\n            results (dict): Result dict from loading pipeline.\n            matrix (np.ndarray): Matrix transforming original\n                image to the network input image size.\n                shape: (3, 3)\n        \"\"\"\n\n        for key in results.get('bbox_fields', []):\n            bboxes = results[key]\n            bboxes[:, :2] = self._affine_transform(bboxes[:, :2], matrix)\n            bboxes[:, 2:] = self._affine_transform(bboxes[:, 2:], matrix)\n            if self.bbox_clip_border:\n                bboxes[:,\n                       [0, 2]] = bboxes[:,\n                                        [0, 2]].clip(0, self.img_scale[0] - 1)\n                bboxes[:,\n                       [1, 3]] = bboxes[:,\n                                        [1, 3]].clip(0, self.img_scale[1] - 1)\n            results[key] = bboxes\n\n    def _affine_transform(self, points, matrix):\n        \"\"\"Affine transform bbox points to input image.\n\n        Args:\n            points (np.ndarray): Points to be transformed.\n                shape: (N, 2)\n            matrix (np.ndarray): Affine transform matrix.\n                shape: (3, 3)\n\n        Returns:\n            np.ndarray: Transformed points.\n        \"\"\"\n        num_points = points.shape[0]\n        hom_points_2d = np.concatenate((points, np.ones((num_points, 1))),\n                                       axis=1)\n        hom_points_2d = hom_points_2d.T\n        affined_points = np.matmul(matrix, hom_points_2d).T\n        return affined_points[:, :2]\n\n    def _get_transform_matrix(self, center, scale, output_scale):\n        \"\"\"Get affine transform matrix.\n\n        Args:\n            center (tuple): Center of current image.\n            scale (tuple): Scale of current image.\n            output_scale (tuple[float]): The transform target image scales.\n\n        Returns:\n            np.ndarray: Affine transform matrix.\n        \"\"\"\n        # TODO: further add rot and shift here.\n        src_w = scale[0]\n        dst_w = output_scale[0]\n        dst_h = output_scale[1]\n\n        src_dir = np.array([0, src_w * -0.5])\n        dst_dir = np.array([0, dst_w * -0.5])\n\n        src = np.zeros((3, 2), dtype=np.float32)\n        dst = np.zeros((3, 2), dtype=np.float32)\n        src[0, :] = center\n        src[1, :] = center + src_dir\n        dst[0, :] = np.array([dst_w * 0.5, dst_h * 0.5])\n        dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir\n\n        src[2, :] = self._get_ref_point(src[0, :], src[1, :])\n        dst[2, :] = self._get_ref_point(dst[0, :], dst[1, :])\n\n        get_matrix = cv2.getAffineTransform(src, dst)\n\n        matrix = np.concatenate((get_matrix, [[0., 0., 1.]]))\n\n        return matrix.astype(np.float32)\n\n    def _get_ref_point(self, ref_point1, ref_point2):\n        \"\"\"Get reference point to calculate affine transform matrix.\n\n        While using opencv to calculate the affine matrix, we need at least\n        three corresponding points separately on original image and target\n        image. Here we use two points to get the the third reference point.\n        \"\"\"\n        d = ref_point1 - ref_point2\n        ref_point3 = ref_point2 + np.array([-d[1], d[0]])\n        return ref_point3\n\n    def __repr__(self):\n        repr_str = self.__class__.__name__\n        repr_str += f'(img_scale={self.img_scale}, '\n        repr_str += f'down_ratio={self.down_ratio}) '\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass RandomShiftScale(object):\n    \"\"\"Random shift scale.\n\n    Different from the normal shift and scale function, it doesn't\n    directly shift or scale image. It can record the shift and scale\n    infos into loading pipelines. It's designed to be used with\n    AffineResize together.\n\n    Args:\n        shift_scale (tuple[float]): Shift and scale range.\n        aug_prob (float): The shifting and scaling probability.\n    \"\"\"\n\n    def __init__(self, shift_scale, aug_prob):\n\n        self.shift_scale = shift_scale\n        self.aug_prob = aug_prob\n\n    def __call__(self, results):\n        \"\"\"Call function to record random shift and scale infos.\n\n        Args:\n            results (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after random shift and scale, 'center', 'size'\n                and 'affine_aug' keys are added in the result dict.\n        \"\"\"\n        img = results['img']\n\n        height, width = img.shape[:2]\n\n        center = np.array([width / 2, height / 2], dtype=np.float32)\n        size = np.array([width, height], dtype=np.float32)\n\n        if random.random() < self.aug_prob:\n            shift, scale = self.shift_scale[0], self.shift_scale[1]\n            shift_ranges = np.arange(-shift, shift + 0.1, 0.1)\n            center[0] += size[0] * random.choice(shift_ranges)\n            center[1] += size[1] * random.choice(shift_ranges)\n            scale_ranges = np.arange(1 - scale, 1 + scale + 0.1, 0.1)\n            size *= random.choice(scale_ranges)\n            results['affine_aug'] = True\n        else:\n            results['affine_aug'] = False\n\n        results['center'] = center\n        results['size'] = size\n\n        return results\n\n    def __repr__(self):\n        repr_str = self.__class__.__name__\n        repr_str += f'(shift_scale={self.shift_scale}, '\n        repr_str += f'aug_prob={self.aug_prob}) '\n        return repr_str\n"
  },
  {
    "path": "mmdet3d/datasets/s3dis_dataset.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom os import path as osp\n\nimport numpy as np\n\nfrom mmdet3d.core import show_seg_result\nfrom mmdet3d.core.bbox import DepthInstance3DBoxes\nfrom mmseg.datasets import DATASETS as SEG_DATASETS\nfrom .builder import DATASETS\nfrom .custom_3d import Custom3DDataset\nfrom .custom_3d_seg import Custom3DSegDataset\nfrom .pipelines import Compose\n\n\n@DATASETS.register_module()\nclass S3DISDataset(Custom3DDataset):\n    r\"\"\"S3DIS Dataset for Detection Task.\n\n    This class is the inner dataset for S3DIS. Since S3DIS has 6 areas, we\n    often train on 5 of them and test on the remaining one. The one for\n    test is Area_5 as suggested in `GSDN <https://arxiv.org/abs/2006.12356>`_.\n    To concatenate 5 areas during training\n    `mmdet.datasets.dataset_wrappers.ConcatDataset` should be used.\n\n    Args:\n        data_root (str): Path of dataset root.\n        ann_file (str): Path of annotation file.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        box_type_3d (str, optional): Type of 3D box of this dataset.\n            Based on the `box_type_3d`, the dataset will encapsulate the box\n            to its original format then converted them to `box_type_3d`.\n            Defaults to 'Depth' in this dataset. Available options includes\n\n            - 'LiDAR': Box in LiDAR coordinates.\n            - 'Depth': Box in depth coordinates, usually for indoor dataset.\n            - 'Camera': Box in camera coordinates.\n        filter_empty_gt (bool, optional): Whether to filter empty GT.\n            Defaults to True.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n    \"\"\"\n    CLASSES = ('table', 'chair', 'sofa', 'bookcase', 'board')\n\n    def __init__(self,\n                 data_root,\n                 ann_file,\n                 pipeline=None,\n                 classes=None,\n                 modality=None,\n                 box_type_3d='Depth',\n                 filter_empty_gt=True,\n                 test_mode=False,\n                 *kwargs):\n        super().__init__(\n            data_root=data_root,\n            ann_file=ann_file,\n            pipeline=pipeline,\n            classes=classes,\n            modality=modality,\n            box_type_3d=box_type_3d,\n            filter_empty_gt=filter_empty_gt,\n            test_mode=test_mode,\n            *kwargs)\n\n    def get_ann_info(self, index):\n        \"\"\"Get annotation info according to the given index.\n\n        Args:\n            index (int): Index of the annotation data to get.\n\n        Returns:\n            dict: annotation information consists of the following keys:\n\n                - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`):\n                    3D ground truth bboxes\n                - gt_labels_3d (np.ndarray): Labels of ground truths.\n                - pts_instance_mask_path (str): Path of instance masks.\n                - pts_semantic_mask_path (str): Path of semantic masks.\n        \"\"\"\n        # Use index to get the annos, thus the evalhook could also use this api\n        info = self.data_infos[index]\n        if info['annos']['gt_num'] != 0:\n            gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype(\n                np.float32)  # k, 6\n            gt_labels_3d = info['annos']['class'].astype(np.int64)\n        else:\n            gt_bboxes_3d = np.zeros((0, 6), dtype=np.float32)\n            gt_labels_3d = np.zeros((0, ), dtype=np.int64)\n\n        # to target box structure\n        gt_bboxes_3d = DepthInstance3DBoxes(\n            gt_bboxes_3d,\n            box_dim=gt_bboxes_3d.shape[-1],\n            with_yaw=False,\n            origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)\n\n        pts_instance_mask_path = osp.join(self.data_root,\n                                          info['pts_instance_mask_path'])\n        pts_semantic_mask_path = osp.join(self.data_root,\n                                          info['pts_semantic_mask_path'])\n\n        anns_results = dict(\n            gt_bboxes_3d=gt_bboxes_3d,\n            gt_labels_3d=gt_labels_3d,\n            pts_instance_mask_path=pts_instance_mask_path,\n            pts_semantic_mask_path=pts_semantic_mask_path)\n        return anns_results\n\n    def get_data_info(self, index):\n        \"\"\"Get data info according to the given index.\n\n        Args:\n            index (int): Index of the sample data to get.\n\n        Returns:\n            dict: Data information that will be passed to the data\n                preprocessing pipelines. It includes the following keys:\n\n                - pts_filename (str): Filename of point clouds.\n                - file_name (str): Filename of point clouds.\n                - ann_info (dict): Annotation info.\n        \"\"\"\n        info = self.data_infos[index]\n        pts_filename = osp.join(self.data_root, info['pts_path'])\n        input_dict = dict(pts_filename=pts_filename)\n\n        if not self.test_mode:\n            annos = self.get_ann_info(index)\n            input_dict['ann_info'] = annos\n            if self.filter_empty_gt and ~(annos['gt_labels_3d'] != -1).any():\n                return None\n        return input_dict\n\n    def _build_default_pipeline(self):\n        \"\"\"Build the default pipeline for this dataset.\"\"\"\n        pipeline = [\n            dict(\n                type='LoadPointsFromFile',\n                coord_type='DEPTH',\n                shift_height=False,\n                load_dim=6,\n                use_dim=[0, 1, 2, 3, 4, 5]),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=self.CLASSES,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ]\n        return Compose(pipeline)\n\n\nclass _S3DISSegDataset(Custom3DSegDataset):\n    r\"\"\"S3DIS Dataset for Semantic Segmentation Task.\n\n    This class is the inner dataset for S3DIS. Since S3DIS has 6 areas, we\n    often train on 5 of them and test on the remaining one.\n    However, there is not a fixed train-test split of S3DIS. People often test\n    on Area_5 as suggested by `SEGCloud <https://arxiv.org/abs/1710.07563>`_.\n    But many papers also report the average results of 6-fold cross validation\n    over the 6 areas (e.g. `DGCNN <https://arxiv.org/abs/1801.07829>`_).\n    Therefore, we use an inner dataset for one area, and further use a dataset\n    wrapper to concat all the provided data in different areas.\n\n    Args:\n        data_root (str): Path of dataset root.\n        ann_file (str): Path of annotation file.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        palette (list[list[int]], optional): The palette of segmentation map.\n            Defaults to None.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n        ignore_index (int, optional): The label index to be ignored, e.g.\n            unannotated points. If None is given, set to len(self.CLASSES).\n            Defaults to None.\n        scene_idxs (np.ndarray | str, optional): Precomputed index to load\n            data. For scenes with many points, we may sample it several times.\n            Defaults to None.\n    \"\"\"\n    CLASSES = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door',\n               'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter')\n\n    VALID_CLASS_IDS = tuple(range(13))\n\n    ALL_CLASS_IDS = tuple(range(14))  # possibly with 'stair' class\n\n    PALETTE = [[0, 255, 0], [0, 0, 255], [0, 255, 255], [255, 255, 0],\n               [255, 0, 255], [100, 100, 255], [200, 200, 100],\n               [170, 120, 200], [255, 0, 0], [200, 100, 100], [10, 200, 100],\n               [200, 200, 200], [50, 50, 50]]\n\n    def __init__(self,\n                 data_root,\n                 ann_file,\n                 pipeline=None,\n                 classes=None,\n                 palette=None,\n                 modality=None,\n                 test_mode=False,\n                 ignore_index=None,\n                 scene_idxs=None,\n                 **kwargs):\n\n        super().__init__(\n            data_root=data_root,\n            ann_file=ann_file,\n            pipeline=pipeline,\n            classes=classes,\n            palette=palette,\n            modality=modality,\n            test_mode=test_mode,\n            ignore_index=ignore_index,\n            scene_idxs=scene_idxs,\n            **kwargs)\n\n    def get_ann_info(self, index):\n        \"\"\"Get annotation info according to the given index.\n\n        Args:\n            index (int): Index of the annotation data to get.\n\n        Returns:\n            dict: annotation information consists of the following keys:\n\n                - pts_semantic_mask_path (str): Path of semantic masks.\n        \"\"\"\n        # Use index to get the annos, thus the evalhook could also use this api\n        info = self.data_infos[index]\n\n        pts_semantic_mask_path = osp.join(self.data_root,\n                                          info['pts_semantic_mask_path'])\n\n        anns_results = dict(pts_semantic_mask_path=pts_semantic_mask_path)\n        return anns_results\n\n    def _build_default_pipeline(self):\n        \"\"\"Build the default pipeline for this dataset.\"\"\"\n        pipeline = [\n            dict(\n                type='LoadPointsFromFile',\n                coord_type='DEPTH',\n                shift_height=False,\n                use_color=True,\n                load_dim=6,\n                use_dim=[0, 1, 2, 3, 4, 5]),\n            dict(\n                type='LoadAnnotations3D',\n                with_bbox_3d=False,\n                with_label_3d=False,\n                with_mask_3d=False,\n                with_seg_3d=True),\n            dict(\n                type='PointSegClassMapping',\n                valid_cat_ids=self.VALID_CLASS_IDS,\n                max_cat_id=np.max(self.ALL_CLASS_IDS)),\n            dict(\n                type='DefaultFormatBundle3D',\n                with_label=False,\n                class_names=self.CLASSES),\n            dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])\n        ]\n        return Compose(pipeline)\n\n    def show(self, results, out_dir, show=True, pipeline=None):\n        \"\"\"Results visualization.\n\n        Args:\n            results (list[dict]): List of bounding boxes results.\n            out_dir (str): Output directory of visualization result.\n            show (bool): Visualize the results online.\n            pipeline (list[dict], optional): raw data loading for showing.\n                Default: None.\n        \"\"\"\n        assert out_dir is not None, 'Expect out_dir, got none.'\n        pipeline = self._get_pipeline(pipeline)\n        for i, result in enumerate(results):\n            data_info = self.data_infos[i]\n            pts_path = data_info['pts_path']\n            file_name = osp.split(pts_path)[-1].split('.')[0]\n            points, gt_sem_mask = self._extract_data(\n                i, pipeline, ['points', 'pts_semantic_mask'], load_annos=True)\n            points = points.numpy()\n            pred_sem_mask = result['semantic_mask'].numpy()\n            show_seg_result(points, gt_sem_mask,\n                            pred_sem_mask, out_dir, file_name,\n                            np.array(self.PALETTE), self.ignore_index, show)\n\n    def get_scene_idxs(self, scene_idxs):\n        \"\"\"Compute scene_idxs for data sampling.\n\n        We sample more times for scenes with more points.\n        \"\"\"\n        # when testing, we load one whole scene every time\n        if not self.test_mode and scene_idxs is None:\n            raise NotImplementedError(\n                'please provide re-sampled scene indexes for training')\n\n        return super().get_scene_idxs(scene_idxs)\n\n\n@DATASETS.register_module()\n@SEG_DATASETS.register_module()\nclass S3DISSegDataset(_S3DISSegDataset):\n    r\"\"\"S3DIS Dataset for Semantic Segmentation Task.\n\n    This class serves as the API for experiments on the S3DIS Dataset.\n    It wraps the provided datasets of different areas.\n    We don't use `mmdet.datasets.dataset_wrappers.ConcatDataset` because we\n    need to concat the `scene_idxs` of different areas.\n\n    Please refer to the `google form <https://docs.google.com/forms/d/e/1FAIpQL\n    ScDimvNMCGhy_rmBA2gHfDu3naktRm6A8BPwAWWDv-Uhm6Shw/viewform?c=0&w=1>`_ for\n    data downloading.\n\n    Args:\n        data_root (str): Path of dataset root.\n        ann_files (list[str]): Path of several annotation files.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        palette (list[list[int]], optional): The palette of segmentation map.\n            Defaults to None.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n        ignore_index (int, optional): The label index to be ignored, e.g.\n            unannotated points. If None is given, set to len(self.CLASSES).\n            Defaults to None.\n        scene_idxs (list[np.ndarray] | list[str], optional): Precomputed index\n            to load data. For scenes with many points, we may sample it several\n            times. Defaults to None.\n    \"\"\"\n\n    def __init__(self,\n                 data_root,\n                 ann_files,\n                 pipeline=None,\n                 classes=None,\n                 palette=None,\n                 modality=None,\n                 test_mode=False,\n                 ignore_index=None,\n                 scene_idxs=None,\n                 **kwargs):\n\n        # make sure that ann_files and scene_idxs have same length\n        ann_files = self._check_ann_files(ann_files)\n        scene_idxs = self._check_scene_idxs(scene_idxs, len(ann_files))\n\n        # initialize some attributes as datasets[0]\n        super().__init__(\n            data_root=data_root,\n            ann_file=ann_files[0],\n            pipeline=pipeline,\n            classes=classes,\n            palette=palette,\n            modality=modality,\n            test_mode=test_mode,\n            ignore_index=ignore_index,\n            scene_idxs=scene_idxs[0],\n            **kwargs)\n\n        datasets = [\n            _S3DISSegDataset(\n                data_root=data_root,\n                ann_file=ann_files[i],\n                pipeline=pipeline,\n                classes=classes,\n                palette=palette,\n                modality=modality,\n                test_mode=test_mode,\n                ignore_index=ignore_index,\n                scene_idxs=scene_idxs[i],\n                **kwargs) for i in range(len(ann_files))\n        ]\n\n        # data_infos and scene_idxs need to be concat\n        self.concat_data_infos([dst.data_infos for dst in datasets])\n        self.concat_scene_idxs([dst.scene_idxs for dst in datasets])\n\n        # set group flag for the sampler\n        if not self.test_mode:\n            self._set_group_flag()\n\n    def concat_data_infos(self, data_infos):\n        \"\"\"Concat data_infos from several datasets to form self.data_infos.\n\n        Args:\n            data_infos (list[list[dict]])\n        \"\"\"\n        self.data_infos = [\n            info for one_data_infos in data_infos for info in one_data_infos\n        ]\n\n    def concat_scene_idxs(self, scene_idxs):\n        \"\"\"Concat scene_idxs from several datasets to form self.scene_idxs.\n\n        Needs to manually add offset to scene_idxs[1, 2, ...].\n\n        Args:\n            scene_idxs (list[np.ndarray])\n        \"\"\"\n        self.scene_idxs = np.array([], dtype=np.int32)\n        offset = 0\n        for one_scene_idxs in scene_idxs:\n            self.scene_idxs = np.concatenate(\n                [self.scene_idxs, one_scene_idxs + offset]).astype(np.int32)\n            offset = np.unique(self.scene_idxs).max() + 1\n\n    @staticmethod\n    def _duplicate_to_list(x, num):\n        \"\"\"Repeat x `num` times to form a list.\"\"\"\n        return [x for _ in range(num)]\n\n    def _check_ann_files(self, ann_file):\n        \"\"\"Make ann_files as list/tuple.\"\"\"\n        # ann_file could be str\n        if not isinstance(ann_file, (list, tuple)):\n            ann_file = self._duplicate_to_list(ann_file, 1)\n        return ann_file\n\n    def _check_scene_idxs(self, scene_idx, num):\n        \"\"\"Make scene_idxs as list/tuple.\"\"\"\n        if scene_idx is None:\n            return self._duplicate_to_list(scene_idx, num)\n        # scene_idx could be str, np.ndarray, list or tuple\n        if isinstance(scene_idx, str):  # str\n            return self._duplicate_to_list(scene_idx, num)\n        if isinstance(scene_idx[0], str):  # list of str\n            return scene_idx\n        if isinstance(scene_idx[0], (list, tuple, np.ndarray)):  # list of idx\n            return scene_idx\n        # single idx\n        return self._duplicate_to_list(scene_idx, num)\n"
  },
  {
    "path": "mmdet3d/datasets/samplers/__init__.py",
    "content": "from .infinite_group_each_sample_in_batch_sampler import InfiniteGroupEachSampleInBatchSampler, InfiniteGroupEachSampleInBatchSamplerEval, TTADistributedSampler\nfrom .d_sampler import CustomDistributedSampler"
  },
  {
    "path": "mmdet3d/datasets/samplers/d_sampler.py",
    "content": "import math\n\nimport torch\nfrom torch.utils.data import DistributedSampler as _DistributedSampler\nfrom torch.utils.data.sampler import Sampler\nclass CustomDistributedSampler(_DistributedSampler):\n\n    def __init__(self,\n                 dataset=None,\n                 num_replicas=None,\n                 rank=None,\n                 shuffle=False,\n                 seed=0):\n        super().__init__(\n            dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle)\n        # for the compatibility from PyTorch 1.3+\n        self.seed = seed if seed is not None else 0\n\n    def __iter__(self):\n        # deterministically shuffle based on epoch\n        if self.shuffle:\n            assert False\n        else:\n            indices = torch.arange(len(self.dataset)).tolist()\n\n        # add extra samples to make it evenly divisible\n        # in case that indices is shorter than half of total_size\n        indices = (indices *\n                   math.ceil(self.total_size / len(indices)))[:self.total_size]\n        assert len(indices) == self.total_size\n\n        # subsample\n        per_replicas = self.total_size//self.num_replicas\n        # indices = indices[self.rank:self.total_size:self.num_replicas]\n        indices = indices[self.rank*per_replicas:(self.rank+1)*per_replicas]\n        assert len(indices) == self.num_samples\n\n        return iter(indices)\n\n\n \n\n"
  },
  {
    "path": "mmdet3d/datasets/samplers/infinite_group_each_sample_in_batch_sampler.py",
    "content": "\n \n\nimport itertools\nimport copy\n\nimport numpy as np\nimport torch\nimport torch.distributed as dist\nfrom mmcv.runner import get_dist_info\nfrom torch.utils.data.sampler import Sampler\n\n# https://github.com/open-mmlab/mmdetection/blob/3b72b12fe9b14de906d1363982b9fba05e7d47c1/mmdet/core/utils/dist_utils.py#L157\ndef sync_random_seed(seed=None, device='cuda'):\n    \"\"\"Make sure different ranks share the same seed.\n    All workers must call this function, otherwise it will deadlock.\n    This method is generally used in `DistributedSampler`,\n    because the seed should be identical across all processes\n    in the distributed group.\n    In distributed sampling, different ranks should sample non-overlapped\n    data in the dataset. Therefore, this function is used to make sure that\n    each rank shuffles the data indices in the same order based\n    on the same seed. Then different ranks could use different indices\n    to select non-overlapped data from the same data list.\n    Args:\n        seed (int, Optional): The seed. Default to None.\n        device (str): The device where the seed will be put on.\n            Default to 'cuda'.\n    Returns:\n        int: Seed to be used.\n    \"\"\"\n    if seed is None:\n        seed = np.random.randint(2**31)\n    assert isinstance(seed, int)\n\n    rank, world_size = get_dist_info()\n\n    if world_size == 1:\n        return seed\n\n    if rank == 0:\n        random_num = torch.tensor(seed, dtype=torch.int32, device=device)\n    else:\n        random_num = torch.tensor(0, dtype=torch.int32, device=device)\n    dist.broadcast(random_num, src=0)\n    return random_num.item()\n\nclass InfiniteGroupEachSampleInBatchSampler(Sampler):\n    \"\"\"\n    Pardon this horrendous name. Basically, we want every sample to be from its own group.\n    If batch size is 4 and # of GPUs is 8, each sample of these 32 should be operating on\n    its own group.\n    Shuffling is only done for group order, not done within groups.\n    \"\"\"\n\n    def __init__(self, \n                 dataset,\n                 batch_size=1,\n                 world_size=None,\n                 rank=None,\n                 seed=0):\n\n        _rank, _world_size = get_dist_info()\n        if world_size is None:\n            world_size = _world_size\n        if rank is None:\n            rank = _rank\n\n        self.dataset = dataset\n        self.batch_size = batch_size\n        self.world_size = world_size\n        self.rank = rank\n        self.seed = sync_random_seed(seed)\n\n        self.size = len(self.dataset)\n\n        assert hasattr(self.dataset, 'flag')\n        self.flag = self.dataset.flag\n        self.group_sizes = np.bincount(self.flag)\n        self.groups_num = len(self.group_sizes)\n        self.global_batch_size = batch_size * world_size\n\n        assert self.groups_num >= self.global_batch_size\n\n        # Now, for efficiency, make a dict group_idx: List[dataset sample_idxs]\n        self.group_idx_to_sample_idxs = {\n            group_idx: np.where(self.flag == group_idx)[0].tolist()\n            for group_idx in range(self.groups_num)}        \n\n        # Get a generator per sample idx. Considering samples over all\n        # GPUs, each sample position has its own generator \n        self.group_indices_per_global_sample_idx = [\n            self._group_indices_per_global_sample_idx(self.rank * self.batch_size + local_sample_idx) \n            for local_sample_idx in range(self.batch_size)]\n        \n        # Keep track of a buffer of dataset sample idxs for each local sample idx\n        self.buffer_per_local_sample = [[] for _ in range(self.batch_size)]\n\n    def _infinite_group_indices(self):\n        g = torch.Generator()\n        g.manual_seed(self.seed)\n        while True:\n            yield from torch.randperm(self.groups_num, generator=g).tolist()\n\n    def _group_indices_per_global_sample_idx(self, global_sample_idx):\n        yield from itertools.islice(self._infinite_group_indices(), \n                                    global_sample_idx, \n                                    None,\n                                    self.global_batch_size)\n\n    def __iter__(self):\n        while True:\n            curr_batch = []\n            for local_sample_idx in range(self.batch_size):\n                if len(self.buffer_per_local_sample[local_sample_idx]) == 0:\n                    # Finished current group, refill with next group\n                    new_group_idx = next(self.group_indices_per_global_sample_idx[local_sample_idx])\n                    self.buffer_per_local_sample[local_sample_idx] = \\\n                        copy.deepcopy(\n                            self.group_idx_to_sample_idxs[new_group_idx])\n\n                curr_batch.append(self.buffer_per_local_sample[local_sample_idx].pop(0))\n            \n            yield curr_batch\n\n    def __len__(self):\n        \"\"\"Length of base dataset.\"\"\"\n        return self.size\n        \n    def set_epoch(self, epoch):\n        self.epoch = epoch\n\n\nclass InfiniteGroupEachSampleInBatchSamplerEval(Sampler):\n    \"\"\"\n    Pardon this horrendous name. Basically, we want every sample to be from its own group.\n    If batch size is 4 and # of GPUs is 8, each sample of these 32 should be operating on\n    its own group.\n    Shuffling is only done for group order, not done within groups.\n    \"\"\"\n\n    def __init__(self, \n                 dataset,\n                 batch_size=1,\n                 world_size=None,\n                 rank=None,\n                 seed=0):\n\n        _rank, _world_size = get_dist_info()\n        if world_size is None:\n            world_size = _world_size\n        if rank is None:\n            rank = _rank\n\n        self.dataset = dataset\n        self.batch_size = batch_size\n        self.world_size = world_size\n        self.rank = rank\n        self.seed = sync_random_seed(seed)\n\n        self.size = len(self.dataset)\n\n        assert hasattr(self.dataset, 'flag')\n        self.flag = self.dataset.flag\n        self.group_sizes = np.bincount(self.flag)\n        self.groups_num = len(self.group_sizes) \n        self.global_batch_size = batch_size * world_size\n        assert self.groups_num >= self.global_batch_size\n\n        # Now, for efficiency, make a dict group_idx: List[dataset sample_idxs]\n        self.group_idx_to_sample_idxs = {\n            group_idx: np.where(self.flag == group_idx)[0].tolist()\n            for group_idx in range(self.groups_num)}        \n\n        # Get a generator per sample idx. Considering samples over all\n        # GPUs, each sample position has its own generator \n        self.group_indices_per_global_sample_idx = [\n            self._group_indices_per_global_sample_idx(self.rank * self.batch_size + local_sample_idx) \n            for local_sample_idx in range(self.batch_size)]\n        \n        # Keep track of a buffer of dataset sample idxs for each local sample idx\n        self.buffer_per_local_sample = [[] for _ in range(self.batch_size)]\n\n    def _infinite_group_indices(self):\n        g = torch.Generator()\n        g.manual_seed(self.seed)\n        while True:\n            yield from torch.randperm(self.groups_num, generator=g).tolist()\n\n    def _group_indices_per_global_sample_idx(self, global_sample_idx):\n        yield from itertools.islice(self._infinite_group_indices(), \n                                    global_sample_idx, \n                                    None,\n                                    self.global_batch_size)\n\n    def __iter__(self):\n\n        t = (len(self.flag)+self.world_size*16 + 1)//self.world_size\n        for i in range(t):\n            if i == 0: self.buffer_per_local_sample = [[] for _ in range(self.batch_size)]\n            curr_batch = []\n            for local_sample_idx in range(self.batch_size):\n                if len(self.buffer_per_local_sample[local_sample_idx]) == 0:\n                    # Finished current group, refill with next group\n                    new_group_idx = next(self.group_indices_per_global_sample_idx[local_sample_idx])\n                    self.buffer_per_local_sample[local_sample_idx] = \\\n                        copy.deepcopy(\n                            self.group_idx_to_sample_idxs[new_group_idx])\n\n                curr_batch.append(self.buffer_per_local_sample[local_sample_idx].pop(0))\n            \n            yield curr_batch\n\n    def __len__(self):\n        \"\"\"Length of base dataset.\"\"\"\n        return self.size\n        \n    def set_epoch(self, epoch):\n        self.epoch = epoch\n\n\n\nclass TTADistributedSampler(Sampler):\n\n    def __init__(self,\n                 dataset,\n                 batch_size=1,\n                 world_size=None,\n                 rank=None,\n                 seed=0):\n        _rank, _world_size = get_dist_info()\n        if world_size is None:\n            world_size = _world_size\n        if rank is None:\n            rank = _rank\n\n        self.dataset = dataset\n        assert batch_size == 1\n        self.batch_size = batch_size\n        self.world_size = world_size\n        self.rank = rank\n        self.seed = sync_random_seed(seed)\n\n        self.size = len(self.dataset)\n\n    def __iter__(self):\n        indices = torch.arange(len(self.dataset)).tolist()\n        for i in indices:\n            yield [i]\n\n    def __len__(self):\n        \"\"\"Length of base dataset.\"\"\"\n        return self.size * 8"
  },
  {
    "path": "mmdet3d/datasets/scannet_dataset.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport tempfile\nimport warnings\nfrom os import path as osp\n\nimport numpy as np\n\nfrom mmdet3d.core import instance_seg_eval, show_result, show_seg_result\nfrom mmdet3d.core.bbox import DepthInstance3DBoxes\nfrom mmseg.datasets import DATASETS as SEG_DATASETS\nfrom .builder import DATASETS\nfrom .custom_3d import Custom3DDataset\nfrom .custom_3d_seg import Custom3DSegDataset\nfrom .pipelines import Compose\n\n\n@DATASETS.register_module()\nclass ScanNetDataset(Custom3DDataset):\n    r\"\"\"ScanNet Dataset for Detection Task.\n\n    This class serves as the API for experiments on the ScanNet Dataset.\n\n    Please refer to the `github repo <https://github.com/ScanNet/ScanNet>`_\n    for data downloading.\n\n    Args:\n        data_root (str): Path of dataset root.\n        ann_file (str): Path of annotation file.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        box_type_3d (str, optional): Type of 3D box of this dataset.\n            Based on the `box_type_3d`, the dataset will encapsulate the box\n            to its original format then converted them to `box_type_3d`.\n            Defaults to 'Depth' in this dataset. Available options includes\n\n            - 'LiDAR': Box in LiDAR coordinates.\n            - 'Depth': Box in depth coordinates, usually for indoor dataset.\n            - 'Camera': Box in camera coordinates.\n        filter_empty_gt (bool, optional): Whether to filter empty GT.\n            Defaults to True.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n    \"\"\"\n    CLASSES = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',\n               'bookshelf', 'picture', 'counter', 'desk', 'curtain',\n               'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',\n               'garbagebin')\n\n    def __init__(self,\n                 data_root,\n                 ann_file,\n                 pipeline=None,\n                 classes=None,\n                 modality=dict(use_camera=False, use_depth=True),\n                 box_type_3d='Depth',\n                 filter_empty_gt=True,\n                 test_mode=False,\n                 **kwargs):\n        super().__init__(\n            data_root=data_root,\n            ann_file=ann_file,\n            pipeline=pipeline,\n            classes=classes,\n            modality=modality,\n            box_type_3d=box_type_3d,\n            filter_empty_gt=filter_empty_gt,\n            test_mode=test_mode,\n            **kwargs)\n        assert 'use_camera' in self.modality and \\\n               'use_depth' in self.modality\n        assert self.modality['use_camera'] or self.modality['use_depth']\n\n    def get_data_info(self, index):\n        \"\"\"Get data info according to the given index.\n\n        Args:\n            index (int): Index of the sample data to get.\n\n        Returns:\n            dict: Data information that will be passed to the data\n                preprocessing pipelines. It includes the following keys:\n\n                - sample_idx (str): Sample index.\n                - pts_filename (str): Filename of point clouds.\n                - file_name (str): Filename of point clouds.\n                - img_prefix (str, optional): Prefix of image files.\n                - img_info (dict, optional): Image info.\n                - ann_info (dict): Annotation info.\n        \"\"\"\n        info = self.data_infos[index]\n        sample_idx = info['point_cloud']['lidar_idx']\n        pts_filename = osp.join(self.data_root, info['pts_path'])\n        input_dict = dict(sample_idx=sample_idx)\n\n        if self.modality['use_depth']:\n            input_dict['pts_filename'] = pts_filename\n            input_dict['file_name'] = pts_filename\n\n        if self.modality['use_camera']:\n            img_info = []\n            for img_path in info['img_paths']:\n                img_info.append(\n                    dict(filename=osp.join(self.data_root, img_path)))\n            intrinsic = info['intrinsics']\n            axis_align_matrix = self._get_axis_align_matrix(info)\n            depth2img = []\n            for extrinsic in info['extrinsics']:\n                depth2img.append(\n                    intrinsic @ np.linalg.inv(axis_align_matrix @ extrinsic))\n\n            input_dict['img_prefix'] = None\n            input_dict['img_info'] = img_info\n            input_dict['depth2img'] = depth2img\n\n        if not self.test_mode:\n            annos = self.get_ann_info(index)\n            input_dict['ann_info'] = annos\n            if self.filter_empty_gt and ~(annos['gt_labels_3d'] != -1).any():\n                return None\n        return input_dict\n\n    def get_ann_info(self, index):\n        \"\"\"Get annotation info according to the given index.\n\n        Args:\n            index (int): Index of the annotation data to get.\n\n        Returns:\n            dict: annotation information consists of the following keys:\n\n                - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`):\n                    3D ground truth bboxes\n                - gt_labels_3d (np.ndarray): Labels of ground truths.\n                - pts_instance_mask_path (str): Path of instance masks.\n                - pts_semantic_mask_path (str): Path of semantic masks.\n                - axis_align_matrix (np.ndarray): Transformation matrix for\n                    global scene alignment.\n        \"\"\"\n        # Use index to get the annos, thus the evalhook could also use this api\n        info = self.data_infos[index]\n        if info['annos']['gt_num'] != 0:\n            gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype(\n                np.float32)  # k, 6\n            gt_labels_3d = info['annos']['class'].astype(np.int64)\n        else:\n            gt_bboxes_3d = np.zeros((0, 6), dtype=np.float32)\n            gt_labels_3d = np.zeros((0, ), dtype=np.int64)\n\n        # to target box structure\n        gt_bboxes_3d = DepthInstance3DBoxes(\n            gt_bboxes_3d,\n            box_dim=gt_bboxes_3d.shape[-1],\n            with_yaw=False,\n            origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)\n\n        pts_instance_mask_path = osp.join(self.data_root,\n                                          info['pts_instance_mask_path'])\n        pts_semantic_mask_path = osp.join(self.data_root,\n                                          info['pts_semantic_mask_path'])\n\n        axis_align_matrix = self._get_axis_align_matrix(info)\n\n        anns_results = dict(\n            gt_bboxes_3d=gt_bboxes_3d,\n            gt_labels_3d=gt_labels_3d,\n            pts_instance_mask_path=pts_instance_mask_path,\n            pts_semantic_mask_path=pts_semantic_mask_path,\n            axis_align_matrix=axis_align_matrix)\n        return anns_results\n\n    def prepare_test_data(self, index):\n        \"\"\"Prepare data for testing.\n\n        We should take axis_align_matrix from self.data_infos since we need\n            to align point clouds.\n\n        Args:\n            index (int): Index for accessing the target data.\n\n        Returns:\n            dict: Testing data dict of the corresponding index.\n        \"\"\"\n        input_dict = self.get_data_info(index)\n        # take the axis_align_matrix from data_infos\n        input_dict['ann_info'] = dict(\n            axis_align_matrix=self._get_axis_align_matrix(\n                self.data_infos[index]))\n        self.pre_pipeline(input_dict)\n        example = self.pipeline(input_dict)\n        return example\n\n    @staticmethod\n    def _get_axis_align_matrix(info):\n        \"\"\"Get axis_align_matrix from info. If not exist, return identity mat.\n\n        Args:\n            info (dict): one data info term.\n\n        Returns:\n            np.ndarray: 4x4 transformation matrix.\n        \"\"\"\n        if 'axis_align_matrix' in info['annos'].keys():\n            return info['annos']['axis_align_matrix'].astype(np.float32)\n        else:\n            warnings.warn(\n                'axis_align_matrix is not found in ScanNet data info, please '\n                'use new pre-process scripts to re-generate ScanNet data')\n            return np.eye(4).astype(np.float32)\n\n    def _build_default_pipeline(self):\n        \"\"\"Build the default pipeline for this dataset.\"\"\"\n        pipeline = [\n            dict(\n                type='LoadPointsFromFile',\n                coord_type='DEPTH',\n                shift_height=False,\n                load_dim=6,\n                use_dim=[0, 1, 2]),\n            dict(type='GlobalAlignment', rotation_axis=2),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=self.CLASSES,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ]\n        return Compose(pipeline)\n\n    def show(self, results, out_dir, show=True, pipeline=None):\n        \"\"\"Results visualization.\n\n        Args:\n            results (list[dict]): List of bounding boxes results.\n            out_dir (str): Output directory of visualization result.\n            show (bool): Visualize the results online.\n            pipeline (list[dict], optional): raw data loading for showing.\n                Default: None.\n        \"\"\"\n        assert out_dir is not None, 'Expect out_dir, got none.'\n        pipeline = self._get_pipeline(pipeline)\n        for i, result in enumerate(results):\n            data_info = self.data_infos[i]\n            pts_path = data_info['pts_path']\n            file_name = osp.split(pts_path)[-1].split('.')[0]\n            points = self._extract_data(i, pipeline, 'points').numpy()\n            gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()\n            pred_bboxes = result['boxes_3d'].tensor.numpy()\n            show_result(points, gt_bboxes, pred_bboxes, out_dir, file_name,\n                        show)\n\n\n@DATASETS.register_module()\n@SEG_DATASETS.register_module()\nclass ScanNetSegDataset(Custom3DSegDataset):\n    r\"\"\"ScanNet Dataset for Semantic Segmentation Task.\n\n    This class serves as the API for experiments on the ScanNet Dataset.\n\n    Please refer to the `github repo <https://github.com/ScanNet/ScanNet>`_\n    for data downloading.\n\n    Args:\n        data_root (str): Path of dataset root.\n        ann_file (str): Path of annotation file.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        palette (list[list[int]], optional): The palette of segmentation map.\n            Defaults to None.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n        ignore_index (int, optional): The label index to be ignored, e.g.\n            unannotated points. If None is given, set to len(self.CLASSES).\n            Defaults to None.\n        scene_idxs (np.ndarray | str, optional): Precomputed index to load\n            data. For scenes with many points, we may sample it several times.\n            Defaults to None.\n    \"\"\"\n    CLASSES = ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table',\n               'door', 'window', 'bookshelf', 'picture', 'counter', 'desk',\n               'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink',\n               'bathtub', 'otherfurniture')\n\n    VALID_CLASS_IDS = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,\n                       33, 34, 36, 39)\n\n    ALL_CLASS_IDS = tuple(range(41))\n\n    PALETTE = [\n        [174, 199, 232],\n        [152, 223, 138],\n        [31, 119, 180],\n        [255, 187, 120],\n        [188, 189, 34],\n        [140, 86, 75],\n        [255, 152, 150],\n        [214, 39, 40],\n        [197, 176, 213],\n        [148, 103, 189],\n        [196, 156, 148],\n        [23, 190, 207],\n        [247, 182, 210],\n        [219, 219, 141],\n        [255, 127, 14],\n        [158, 218, 229],\n        [44, 160, 44],\n        [112, 128, 144],\n        [227, 119, 194],\n        [82, 84, 163],\n    ]\n\n    def __init__(self,\n                 data_root,\n                 ann_file,\n                 pipeline=None,\n                 classes=None,\n                 palette=None,\n                 modality=None,\n                 test_mode=False,\n                 ignore_index=None,\n                 scene_idxs=None,\n                 **kwargs):\n\n        super().__init__(\n            data_root=data_root,\n            ann_file=ann_file,\n            pipeline=pipeline,\n            classes=classes,\n            palette=palette,\n            modality=modality,\n            test_mode=test_mode,\n            ignore_index=ignore_index,\n            scene_idxs=scene_idxs,\n            **kwargs)\n\n    def get_ann_info(self, index):\n        \"\"\"Get annotation info according to the given index.\n\n        Args:\n            index (int): Index of the annotation data to get.\n\n        Returns:\n            dict: annotation information consists of the following keys:\n\n                - pts_semantic_mask_path (str): Path of semantic masks.\n        \"\"\"\n        # Use index to get the annos, thus the evalhook could also use this api\n        info = self.data_infos[index]\n\n        pts_semantic_mask_path = osp.join(self.data_root,\n                                          info['pts_semantic_mask_path'])\n\n        anns_results = dict(pts_semantic_mask_path=pts_semantic_mask_path)\n        return anns_results\n\n    def _build_default_pipeline(self):\n        \"\"\"Build the default pipeline for this dataset.\"\"\"\n        pipeline = [\n            dict(\n                type='LoadPointsFromFile',\n                coord_type='DEPTH',\n                shift_height=False,\n                use_color=True,\n                load_dim=6,\n                use_dim=[0, 1, 2, 3, 4, 5]),\n            dict(\n                type='LoadAnnotations3D',\n                with_bbox_3d=False,\n                with_label_3d=False,\n                with_mask_3d=False,\n                with_seg_3d=True),\n            dict(\n                type='PointSegClassMapping',\n                valid_cat_ids=self.VALID_CLASS_IDS,\n                max_cat_id=np.max(self.ALL_CLASS_IDS)),\n            dict(\n                type='DefaultFormatBundle3D',\n                with_label=False,\n                class_names=self.CLASSES),\n            dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])\n        ]\n        return Compose(pipeline)\n\n    def show(self, results, out_dir, show=True, pipeline=None):\n        \"\"\"Results visualization.\n\n        Args:\n            results (list[dict]): List of bounding boxes results.\n            out_dir (str): Output directory of visualization result.\n            show (bool): Visualize the results online.\n            pipeline (list[dict], optional): raw data loading for showing.\n                Default: None.\n        \"\"\"\n        assert out_dir is not None, 'Expect out_dir, got none.'\n        pipeline = self._get_pipeline(pipeline)\n        for i, result in enumerate(results):\n            data_info = self.data_infos[i]\n            pts_path = data_info['pts_path']\n            file_name = osp.split(pts_path)[-1].split('.')[0]\n            points, gt_sem_mask = self._extract_data(\n                i, pipeline, ['points', 'pts_semantic_mask'], load_annos=True)\n            points = points.numpy()\n            pred_sem_mask = result['semantic_mask'].numpy()\n            show_seg_result(points, gt_sem_mask,\n                            pred_sem_mask, out_dir, file_name,\n                            np.array(self.PALETTE), self.ignore_index, show)\n\n    def get_scene_idxs(self, scene_idxs):\n        \"\"\"Compute scene_idxs for data sampling.\n\n        We sample more times for scenes with more points.\n        \"\"\"\n        # when testing, we load one whole scene every time\n        if not self.test_mode and scene_idxs is None:\n            raise NotImplementedError(\n                'please provide re-sampled scene indexes for training')\n\n        return super().get_scene_idxs(scene_idxs)\n\n    def format_results(self, results, txtfile_prefix=None):\n        r\"\"\"Format the results to txt file. Refer to `ScanNet documentation\n        <http://kaldir.vc.in.tum.de/scannet_benchmark/documentation>`_.\n\n        Args:\n            outputs (list[dict]): Testing results of the dataset.\n            txtfile_prefix (str): The prefix of saved files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n\n        Returns:\n            tuple: (outputs, tmp_dir), outputs is the detection results,\n                tmp_dir is the temporal directory created for saving submission\n                files when ``submission_prefix`` is not specified.\n        \"\"\"\n        import mmcv\n\n        if txtfile_prefix is None:\n            tmp_dir = tempfile.TemporaryDirectory()\n            txtfile_prefix = osp.join(tmp_dir.name, 'results')\n        else:\n            tmp_dir = None\n        mmcv.mkdir_or_exist(txtfile_prefix)\n\n        # need to map network output to original label idx\n        pred2label = np.zeros(len(self.VALID_CLASS_IDS)).astype(np.int)\n        for original_label, output_idx in self.label_map.items():\n            if output_idx != self.ignore_index:\n                pred2label[output_idx] = original_label\n\n        outputs = []\n        for i, result in enumerate(results):\n            info = self.data_infos[i]\n            sample_idx = info['point_cloud']['lidar_idx']\n            pred_sem_mask = result['semantic_mask'].numpy().astype(np.int)\n            pred_label = pred2label[pred_sem_mask]\n            curr_file = f'{txtfile_prefix}/{sample_idx}.txt'\n            np.savetxt(curr_file, pred_label, fmt='%d')\n            outputs.append(dict(seg_mask=pred_label))\n\n        return outputs, tmp_dir\n\n\n@DATASETS.register_module()\n@SEG_DATASETS.register_module()\nclass ScanNetInstanceSegDataset(Custom3DSegDataset):\n    CLASSES = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',\n               'bookshelf', 'picture', 'counter', 'desk', 'curtain',\n               'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',\n               'garbagebin')\n\n    VALID_CLASS_IDS = (3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34,\n                       36, 39)\n\n    ALL_CLASS_IDS = tuple(range(41))\n\n    def get_ann_info(self, index):\n        \"\"\"Get annotation info according to the given index.\n\n        Args:\n            index (int): Index of the annotation data to get.\n\n        Returns:\n            dict: annotation information consists of the following keys:\n                - pts_semantic_mask_path (str): Path of semantic masks.\n                - pts_instance_mask_path (str): Path of instance masks.\n        \"\"\"\n        # Use index to get the annos, thus the evalhook could also use this api\n        info = self.data_infos[index]\n\n        pts_instance_mask_path = osp.join(self.data_root,\n                                          info['pts_instance_mask_path'])\n        pts_semantic_mask_path = osp.join(self.data_root,\n                                          info['pts_semantic_mask_path'])\n\n        anns_results = dict(\n            pts_instance_mask_path=pts_instance_mask_path,\n            pts_semantic_mask_path=pts_semantic_mask_path)\n        return anns_results\n\n    def get_classes_and_palette(self, classes=None, palette=None):\n        \"\"\"Get class names of current dataset. Palette is simply ignored for\n        instance segmentation.\n\n        Args:\n            classes (Sequence[str] | str | None): If classes is None, use\n                default CLASSES defined by builtin dataset. If classes is a\n                string, take it as a file name. The file contains the name of\n                classes where each line contains one class name. If classes is\n                a tuple or list, override the CLASSES defined by the dataset.\n                Defaults to None.\n            palette (Sequence[Sequence[int]]] | np.ndarray | None):\n                The palette of segmentation map. If None is given, random\n                palette will be generated. Defaults to None.\n        \"\"\"\n        if classes is not None:\n            return classes, None\n        return self.CLASSES, None\n\n    def _build_default_pipeline(self):\n        \"\"\"Build the default pipeline for this dataset.\"\"\"\n        pipeline = [\n            dict(\n                type='LoadPointsFromFile',\n                coord_type='DEPTH',\n                shift_height=False,\n                use_color=True,\n                load_dim=6,\n                use_dim=[0, 1, 2, 3, 4, 5]),\n            dict(\n                type='LoadAnnotations3D',\n                with_bbox_3d=False,\n                with_label_3d=False,\n                with_mask_3d=True,\n                with_seg_3d=True),\n            dict(\n                type='PointSegClassMapping',\n                valid_cat_ids=self.VALID_CLASS_IDS,\n                max_cat_id=40),\n            dict(\n                type='DefaultFormatBundle3D',\n                with_label=False,\n                class_names=self.CLASSES),\n            dict(\n                type='Collect3D',\n                keys=['points', 'pts_semantic_mask', 'pts_instance_mask'])\n        ]\n        return Compose(pipeline)\n\n    def evaluate(self,\n                 results,\n                 metric=None,\n                 options=None,\n                 logger=None,\n                 show=False,\n                 out_dir=None,\n                 pipeline=None):\n        \"\"\"Evaluation in instance segmentation protocol.\n\n        Args:\n            results (list[dict]): List of results.\n            metric (str | list[str]): Metrics to be evaluated.\n            options (dict, optional): options for instance_seg_eval.\n            logger (logging.Logger | None | str): Logger used for printing\n                related information during evaluation. Defaults to None.\n            show (bool, optional): Whether to visualize.\n                Defaults to False.\n            out_dir (str, optional): Path to save the visualization results.\n                Defaults to None.\n            pipeline (list[dict], optional): raw data loading for showing.\n                Default: None.\n\n        Returns:\n            dict: Evaluation results.\n        \"\"\"\n        assert isinstance(\n            results, list), f'Expect results to be list, got {type(results)}.'\n        assert len(results) > 0, 'Expect length of results > 0.'\n        assert len(results) == len(self.data_infos)\n        assert isinstance(\n            results[0], dict\n        ), f'Expect elements in results to be dict, got {type(results[0])}.'\n\n        load_pipeline = self._get_pipeline(pipeline)\n        pred_instance_masks = [result['instance_mask'] for result in results]\n        pred_instance_labels = [result['instance_label'] for result in results]\n        pred_instance_scores = [result['instance_score'] for result in results]\n        gt_semantic_masks, gt_instance_masks = zip(*[\n            self._extract_data(\n                index=i,\n                pipeline=load_pipeline,\n                key=['pts_semantic_mask', 'pts_instance_mask'],\n                load_annos=True) for i in range(len(self.data_infos))\n        ])\n        ret_dict = instance_seg_eval(\n            gt_semantic_masks,\n            gt_instance_masks,\n            pred_instance_masks,\n            pred_instance_labels,\n            pred_instance_scores,\n            valid_class_ids=self.VALID_CLASS_IDS,\n            class_labels=self.CLASSES,\n            options=options,\n            logger=logger)\n\n        if show:\n            raise NotImplementedError('show is not implemented for now')\n\n        return ret_dict\n"
  },
  {
    "path": "mmdet3d/datasets/semantickitti_dataset.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom os import path as osp\n\nfrom .builder import DATASETS\nfrom .custom_3d import Custom3DDataset\n\n\n@DATASETS.register_module()\nclass SemanticKITTIDataset(Custom3DDataset):\n    r\"\"\"SemanticKITTI Dataset.\n\n    This class serves as the API for experiments on the SemanticKITTI Dataset\n    Please refer to <http://www.semantic-kitti.org/dataset.html>`_\n    for data downloading\n\n    Args:\n        data_root (str): Path of dataset root.\n        ann_file (str): Path of annotation file.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        box_type_3d (str, optional): NO 3D box for this dataset.\n            You can choose any type\n            Based on the `box_type_3d`, the dataset will encapsulate the box\n            to its original format then converted them to `box_type_3d`.\n            Defaults to 'LiDAR' in this dataset. Available options includes\n\n            - 'LiDAR': Box in LiDAR coordinates.\n            - 'Depth': Box in depth coordinates, usually for indoor dataset.\n            - 'Camera': Box in camera coordinates.\n        filter_empty_gt (bool, optional): Whether to filter empty GT.\n            Defaults to True.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n    \"\"\"\n    CLASSES = ('unlabeled', 'car', 'bicycle', 'motorcycle', 'truck', 'bus',\n               'person', 'bicyclist', 'motorcyclist', 'road', 'parking',\n               'sidewalk', 'other-ground', 'building', 'fence', 'vegetation',\n               'trunck', 'terrian', 'pole', 'traffic-sign')\n\n    def __init__(self,\n                 data_root,\n                 ann_file,\n                 pipeline=None,\n                 classes=None,\n                 modality=None,\n                 box_type_3d='Lidar',\n                 filter_empty_gt=False,\n                 test_mode=False):\n        super().__init__(\n            data_root=data_root,\n            ann_file=ann_file,\n            pipeline=pipeline,\n            classes=classes,\n            modality=modality,\n            box_type_3d=box_type_3d,\n            filter_empty_gt=filter_empty_gt,\n            test_mode=test_mode)\n\n    def get_data_info(self, index):\n        \"\"\"Get data info according to the given index.\n        Args:\n            index (int): Index of the sample data to get.\n\n        Returns:\n            dict: Data information that will be passed to the data\n                preprocessing pipelines. It includes the following keys:\n                - sample_idx (str): Sample index.\n                - pts_filename (str): Filename of point clouds.\n                - file_name (str): Filename of point clouds.\n                - ann_info (dict): Annotation info.\n        \"\"\"\n        info = self.data_infos[index]\n        sample_idx = info['point_cloud']['lidar_idx']\n        pts_filename = osp.join(self.data_root, info['pts_path'])\n\n        input_dict = dict(\n            pts_filename=pts_filename,\n            sample_idx=sample_idx,\n            file_name=pts_filename)\n\n        if not self.test_mode:\n            annos = self.get_ann_info(index)\n            input_dict['ann_info'] = annos\n            if self.filter_empty_gt and ~(annos['gt_labels_3d'] != -1).any():\n                return None\n        return input_dict\n\n    def get_ann_info(self, index):\n        \"\"\"Get annotation info according to the given index.\n\n        Args:\n            index (int): Index of the annotation data to get.\n\n        Returns:\n            dict: annotation information consists of the following keys:\n\n                - pts_semantic_mask_path (str): Path of semantic masks.\n        \"\"\"\n        # Use index to get the annos, thus the evalhook could also use this api\n        info = self.data_infos[index]\n\n        pts_semantic_mask_path = osp.join(self.data_root,\n                                          info['pts_semantic_mask_path'])\n\n        anns_results = dict(pts_semantic_mask_path=pts_semantic_mask_path)\n        return anns_results\n"
  },
  {
    "path": "mmdet3d/datasets/sunrgbd_dataset.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom collections import OrderedDict\nfrom os import path as osp\n\nimport numpy as np\n\nfrom mmdet3d.core import show_multi_modality_result, show_result\nfrom mmdet3d.core.bbox import DepthInstance3DBoxes\nfrom mmdet.core import eval_map\nfrom .builder import DATASETS\nfrom .custom_3d import Custom3DDataset\nfrom .pipelines import Compose\n\n\n@DATASETS.register_module()\nclass SUNRGBDDataset(Custom3DDataset):\n    r\"\"\"SUNRGBD Dataset.\n\n    This class serves as the API for experiments on the SUNRGBD Dataset.\n\n    See the `download page <http://rgbd.cs.princeton.edu/challenge.html>`_\n    for data downloading.\n\n    Args:\n        data_root (str): Path of dataset root.\n        ann_file (str): Path of annotation file.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        box_type_3d (str, optional): Type of 3D box of this dataset.\n            Based on the `box_type_3d`, the dataset will encapsulate the box\n            to its original format then converted them to `box_type_3d`.\n            Defaults to 'Depth' in this dataset. Available options includes\n\n            - 'LiDAR': Box in LiDAR coordinates.\n            - 'Depth': Box in depth coordinates, usually for indoor dataset.\n            - 'Camera': Box in camera coordinates.\n        filter_empty_gt (bool, optional): Whether to filter empty GT.\n            Defaults to True.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n    \"\"\"\n    CLASSES = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',\n               'night_stand', 'bookshelf', 'bathtub')\n\n    def __init__(self,\n                 data_root,\n                 ann_file,\n                 pipeline=None,\n                 classes=None,\n                 modality=dict(use_camera=True, use_lidar=True),\n                 box_type_3d='Depth',\n                 filter_empty_gt=True,\n                 test_mode=False,\n                 **kwargs):\n        super().__init__(\n            data_root=data_root,\n            ann_file=ann_file,\n            pipeline=pipeline,\n            classes=classes,\n            modality=modality,\n            box_type_3d=box_type_3d,\n            filter_empty_gt=filter_empty_gt,\n            test_mode=test_mode,\n            **kwargs)\n        assert 'use_camera' in self.modality and \\\n            'use_lidar' in self.modality\n        assert self.modality['use_camera'] or self.modality['use_lidar']\n\n    def get_data_info(self, index):\n        \"\"\"Get data info according to the given index.\n\n        Args:\n            index (int): Index of the sample data to get.\n\n        Returns:\n            dict: Data information that will be passed to the data\n                preprocessing pipelines. It includes the following keys:\n\n                - sample_idx (str): Sample index.\n                - pts_filename (str, optional): Filename of point clouds.\n                - file_name (str, optional): Filename of point clouds.\n                - img_prefix (str, optional): Prefix of image files.\n                - img_info (dict, optional): Image info.\n                - calib (dict, optional): Camera calibration info.\n                - ann_info (dict): Annotation info.\n        \"\"\"\n        info = self.data_infos[index]\n        sample_idx = info['point_cloud']['lidar_idx']\n        assert info['point_cloud']['lidar_idx'] == info['image']['image_idx']\n        input_dict = dict(sample_idx=sample_idx)\n\n        if self.modality['use_lidar']:\n            pts_filename = osp.join(self.data_root, info['pts_path'])\n            input_dict['pts_filename'] = pts_filename\n            input_dict['file_name'] = pts_filename\n\n        if self.modality['use_camera']:\n            img_filename = osp.join(\n                osp.join(self.data_root, 'sunrgbd_trainval'),\n                info['image']['image_path'])\n            input_dict['img_prefix'] = None\n            input_dict['img_info'] = dict(filename=img_filename)\n            calib = info['calib']\n            rt_mat = calib['Rt']\n            # follow Coord3DMode.convert_point\n            rt_mat = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]\n                               ]) @ rt_mat.transpose(1, 0)\n            depth2img = calib['K'] @ rt_mat\n            input_dict['depth2img'] = depth2img\n\n        if not self.test_mode:\n            annos = self.get_ann_info(index)\n            input_dict['ann_info'] = annos\n            if self.filter_empty_gt and len(annos['gt_bboxes_3d']) == 0:\n                return None\n        return input_dict\n\n    def get_ann_info(self, index):\n        \"\"\"Get annotation info according to the given index.\n\n        Args:\n            index (int): Index of the annotation data to get.\n\n        Returns:\n            dict: annotation information consists of the following keys:\n\n                - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`):\n                    3D ground truth bboxes\n                - gt_labels_3d (np.ndarray): Labels of ground truths.\n                - pts_instance_mask_path (str): Path of instance masks.\n                - pts_semantic_mask_path (str): Path of semantic masks.\n        \"\"\"\n        # Use index to get the annos, thus the evalhook could also use this api\n        info = self.data_infos[index]\n        if info['annos']['gt_num'] != 0:\n            gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype(\n                np.float32)  # k, 6\n            gt_labels_3d = info['annos']['class'].astype(np.int64)\n        else:\n            gt_bboxes_3d = np.zeros((0, 7), dtype=np.float32)\n            gt_labels_3d = np.zeros((0, ), dtype=np.int64)\n\n        # to target box structure\n        gt_bboxes_3d = DepthInstance3DBoxes(\n            gt_bboxes_3d, origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)\n\n        anns_results = dict(\n            gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels_3d)\n\n        if self.modality['use_camera']:\n            if info['annos']['gt_num'] != 0:\n                gt_bboxes_2d = info['annos']['bbox'].astype(np.float32)\n            else:\n                gt_bboxes_2d = np.zeros((0, 4), dtype=np.float32)\n            anns_results['bboxes'] = gt_bboxes_2d\n            anns_results['labels'] = gt_labels_3d\n\n        return anns_results\n\n    def _build_default_pipeline(self):\n        \"\"\"Build the default pipeline for this dataset.\"\"\"\n        pipeline = [\n            dict(\n                type='LoadPointsFromFile',\n                coord_type='DEPTH',\n                shift_height=False,\n                load_dim=6,\n                use_dim=[0, 1, 2]),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=self.CLASSES,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ]\n        if self.modality['use_camera']:\n            pipeline.insert(0, dict(type='LoadImageFromFile'))\n        return Compose(pipeline)\n\n    def show(self, results, out_dir, show=True, pipeline=None):\n        \"\"\"Results visualization.\n\n        Args:\n            results (list[dict]): List of bounding boxes results.\n            out_dir (str): Output directory of visualization result.\n            show (bool): Visualize the results online.\n            pipeline (list[dict], optional): raw data loading for showing.\n                Default: None.\n        \"\"\"\n        assert out_dir is not None, 'Expect out_dir, got none.'\n        pipeline = self._get_pipeline(pipeline)\n        for i, result in enumerate(results):\n            data_info = self.data_infos[i]\n            pts_path = data_info['pts_path']\n            file_name = osp.split(pts_path)[-1].split('.')[0]\n            points, img_metas, img = self._extract_data(\n                i, pipeline, ['points', 'img_metas', 'img'])\n            # scale colors to [0, 255]\n            points = points.numpy()\n            points[:, 3:] *= 255\n\n            gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()\n            pred_bboxes = result['boxes_3d'].tensor.numpy()\n            show_result(points, gt_bboxes.copy(), pred_bboxes.copy(), out_dir,\n                        file_name, show)\n\n            # multi-modality visualization\n            if self.modality['use_camera']:\n                img = img.numpy()\n                # need to transpose channel to first dim\n                img = img.transpose(1, 2, 0)\n                pred_bboxes = DepthInstance3DBoxes(\n                    pred_bboxes, origin=(0.5, 0.5, 0))\n                gt_bboxes = DepthInstance3DBoxes(\n                    gt_bboxes, origin=(0.5, 0.5, 0))\n                show_multi_modality_result(\n                    img,\n                    gt_bboxes,\n                    pred_bboxes,\n                    None,\n                    out_dir,\n                    file_name,\n                    box_mode='depth',\n                    img_metas=img_metas,\n                    show=show)\n\n    def evaluate(self,\n                 results,\n                 metric=None,\n                 iou_thr=(0.25, 0.5),\n                 iou_thr_2d=(0.5, ),\n                 logger=None,\n                 show=False,\n                 out_dir=None,\n                 pipeline=None):\n        \"\"\"Evaluate.\n\n        Evaluation in indoor protocol.\n\n        Args:\n            results (list[dict]): List of results.\n            metric (str | list[str], optional): Metrics to be evaluated.\n                Default: None.\n            iou_thr (list[float], optional): AP IoU thresholds for 3D\n                evaluation. Default: (0.25, 0.5).\n            iou_thr_2d (list[float], optional): AP IoU thresholds for 2D\n                evaluation. Default: (0.5, ).\n            show (bool, optional): Whether to visualize.\n                Default: False.\n            out_dir (str, optional): Path to save the visualization results.\n                Default: None.\n            pipeline (list[dict], optional): raw data loading for showing.\n                Default: None.\n\n        Returns:\n            dict: Evaluation results.\n        \"\"\"\n        # evaluate 3D detection performance\n        if isinstance(results[0], dict):\n            return super().evaluate(results, metric, iou_thr, logger, show,\n                                    out_dir, pipeline)\n        # evaluate 2D detection performance\n        else:\n            eval_results = OrderedDict()\n            annotations = [self.get_ann_info(i) for i in range(len(self))]\n            iou_thr_2d = (iou_thr_2d) if isinstance(iou_thr_2d,\n                                                    float) else iou_thr_2d\n            for iou_thr_2d_single in iou_thr_2d:\n                mean_ap, _ = eval_map(\n                    results,\n                    annotations,\n                    scale_ranges=None,\n                    iou_thr=iou_thr_2d_single,\n                    dataset=self.CLASSES,\n                    logger=logger)\n                eval_results['mAP_' + str(iou_thr_2d_single)] = mean_ap\n            return eval_results\n"
  },
  {
    "path": "mmdet3d/datasets/utils.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\n\n# yapf: disable\nfrom mmdet3d.datasets.pipelines import (Collect3D, DefaultFormatBundle3D,\n                                        LoadAnnotations3D,\n                                        LoadImageFromFileMono3D,\n                                        LoadMultiViewImageFromFiles,\n                                        LoadPointsFromFile,\n                                        LoadPointsFromMultiSweeps,\n                                        MultiScaleFlipAug3D,\n                                        PointSegClassMapping)\nfrom mmdet.datasets.pipelines import LoadImageFromFile, MultiScaleFlipAug\n# yapf: enable\nfrom .builder import PIPELINES\n\n\ndef is_loading_function(transform):\n    \"\"\"Judge whether a transform function is a loading function.\n\n    Note: `MultiScaleFlipAug3D` is a wrapper for multiple pipeline functions,\n    so we need to search if its inner transforms contain any loading function.\n\n    Args:\n        transform (dict | :obj:`Pipeline`): A transform config or a function.\n\n    Returns:\n        bool: Whether it is a loading function. None means can't judge.\n            When transform is `MultiScaleFlipAug3D`, we return None.\n    \"\"\"\n    # TODO: use more elegant way to distinguish loading modules\n    loading_functions = (LoadImageFromFile, LoadPointsFromFile,\n                         LoadAnnotations3D, LoadMultiViewImageFromFiles,\n                         LoadPointsFromMultiSweeps, DefaultFormatBundle3D,\n                         Collect3D, LoadImageFromFileMono3D,\n                         PointSegClassMapping)\n    if isinstance(transform, dict):\n        obj_cls = PIPELINES.get(transform['type'])\n        if obj_cls is None:\n            return False\n        if obj_cls in loading_functions:\n            return True\n        if obj_cls in (MultiScaleFlipAug3D, MultiScaleFlipAug):\n            return None\n    elif callable(transform):\n        if isinstance(transform, loading_functions):\n            return True\n        if isinstance(transform, (MultiScaleFlipAug3D, MultiScaleFlipAug)):\n            return None\n    return False\n\n\ndef get_loading_pipeline(pipeline):\n    \"\"\"Only keep loading image, points and annotations related configuration.\n\n    Args:\n        pipeline (list[dict] | list[:obj:`Pipeline`]):\n            Data pipeline configs or list of pipeline functions.\n\n    Returns:\n        list[dict] | list[:obj:`Pipeline`]): The new pipeline list with only\n            keep loading image, points and annotations related configuration.\n\n    Examples:\n        >>> pipelines = [\n        ...    dict(type='LoadPointsFromFile',\n        ...         coord_type='LIDAR', load_dim=4, use_dim=4),\n        ...    dict(type='LoadImageFromFile'),\n        ...    dict(type='LoadAnnotations3D',\n        ...         with_bbox=True, with_label_3d=True),\n        ...    dict(type='Resize',\n        ...         img_scale=[(640, 192), (2560, 768)], keep_ratio=True),\n        ...    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n        ...    dict(type='PointsRangeFilter',\n        ...         point_cloud_range=point_cloud_range),\n        ...    dict(type='ObjectRangeFilter',\n        ...         point_cloud_range=point_cloud_range),\n        ...    dict(type='PointShuffle'),\n        ...    dict(type='Normalize', **img_norm_cfg),\n        ...    dict(type='Pad', size_divisor=32),\n        ...    dict(type='DefaultFormatBundle3D', class_names=class_names),\n        ...    dict(type='Collect3D',\n        ...         keys=['points', 'img', 'gt_bboxes_3d', 'gt_labels_3d'])\n        ...    ]\n        >>> expected_pipelines = [\n        ...    dict(type='LoadPointsFromFile',\n        ...         coord_type='LIDAR', load_dim=4, use_dim=4),\n        ...    dict(type='LoadImageFromFile'),\n        ...    dict(type='LoadAnnotations3D',\n        ...         with_bbox=True, with_label_3d=True),\n        ...    dict(type='DefaultFormatBundle3D', class_names=class_names),\n        ...    dict(type='Collect3D',\n        ...         keys=['points', 'img', 'gt_bboxes_3d', 'gt_labels_3d'])\n        ...    ]\n        >>> assert expected_pipelines == \\\n        ...        get_loading_pipeline(pipelines)\n    \"\"\"\n    loading_pipeline = []\n    for transform in pipeline:\n        is_loading = is_loading_function(transform)\n        if is_loading is None:  # MultiScaleFlipAug3D\n            # extract its inner pipeline\n            if isinstance(transform, dict):\n                inner_pipeline = transform.get('transforms', [])\n            else:\n                inner_pipeline = transform.transforms.transforms\n            loading_pipeline.extend(get_loading_pipeline(inner_pipeline))\n        elif is_loading:\n            loading_pipeline.append(transform)\n    assert len(loading_pipeline) > 0, \\\n        'The data pipeline in your config file must include ' \\\n        'loading step.'\n    return loading_pipeline\n\n\ndef extract_result_dict(results, key):\n    \"\"\"Extract and return the data corresponding to key in result dict.\n\n    ``results`` is a dict output from `pipeline(input_dict)`, which is the\n        loaded data from ``Dataset`` class.\n    The data terms inside may be wrapped in list, tuple and DataContainer, so\n        this function essentially extracts data from these wrappers.\n\n    Args:\n        results (dict): Data loaded using pipeline.\n        key (str): Key of the desired data.\n\n    Returns:\n        np.ndarray | torch.Tensor: Data term.\n    \"\"\"\n    if key not in results.keys():\n        return None\n    # results[key] may be data or list[data] or tuple[data]\n    # data may be wrapped inside DataContainer\n    data = results[key]\n    if isinstance(data, (list, tuple)):\n        data = data[0]\n    if isinstance(data, mmcv.parallel.DataContainer):\n        data = data._data\n    return data\n    \nimport numpy as np\nfrom pyquaternion import Quaternion\n\ndef nuscenes_get_rt_matrix(\n    src_sample,\n    dest_sample,\n    src_mod,\n    dest_mod):\n    \n    \"\"\"\n    CAM_FRONT_XYD indicates going from 2d image coords + depth\n        Note that image coords need to multiplied with said depths first to bring it into 2d hom coords.\n    CAM_FRONT indicates going from camera coordinates xyz\n    \n    Method is: whatever the input is, transform to global first.\n    \"\"\"\n    possible_mods = ['CAM_FRONT_XYD', \n                     'CAM_FRONT_RIGHT_XYD', \n                     'CAM_FRONT_LEFT_XYD', \n                     'CAM_BACK_XYD', \n                     'CAM_BACK_LEFT_XYD', \n                     'CAM_BACK_RIGHT_XYD',\n                     'CAM_FRONT', \n                     'CAM_FRONT_RIGHT', \n                     'CAM_FRONT_LEFT', \n                     'CAM_BACK', \n                     'CAM_BACK_LEFT', \n                     'CAM_BACK_RIGHT',\n                     'lidar',\n                     'ego',\n                     'global']\n\n    assert src_mod in possible_mods and dest_mod in possible_mods\n    \n    src_lidar_to_ego = np.eye(4, 4)\n    src_lidar_to_ego[:3, :3] = Quaternion(src_sample['lidar2ego_rotation']).rotation_matrix\n    src_lidar_to_ego[:3, 3] = np.array(src_sample['lidar2ego_translation'])\n    \n    src_ego_to_global = np.eye(4, 4)\n    src_ego_to_global[:3, :3] = Quaternion(src_sample['ego2global_rotation']).rotation_matrix\n    src_ego_to_global[:3, 3] = np.array(src_sample['ego2global_translation'])\n    \n    dest_lidar_to_ego = np.eye(4, 4)\n    dest_lidar_to_ego[:3, :3] = Quaternion(dest_sample['lidar2ego_rotation']).rotation_matrix\n    dest_lidar_to_ego[:3, 3] = np.array(dest_sample['lidar2ego_translation'])\n    \n    dest_ego_to_global = np.eye(4, 4)\n    dest_ego_to_global[:3, :3] = Quaternion(dest_sample['ego2global_rotation']).rotation_matrix\n    dest_ego_to_global[:3, 3] = np.array(dest_sample['ego2global_translation'])\n    \n    src_mod_to_global = None\n    dest_global_to_mod = None\n    \n    if src_mod == \"global\":\n        src_mod_to_global = np.eye(4, 4)\n    elif src_mod == \"ego\":\n        src_mod_to_global = src_ego_to_global\n    elif src_mod == \"lidar\":\n        src_mod_to_global = src_ego_to_global @ src_lidar_to_ego\n    elif \"CAM\" in src_mod:\n        src_sample_cam = src_sample['cams'][src_mod.replace(\"_XYD\", \"\")]\n        \n        src_cam_to_lidar = np.eye(4, 4)\n        src_cam_to_lidar[:3, :3] = src_sample_cam['sensor2lidar_rotation']\n        src_cam_to_lidar[:3, 3] = src_sample_cam['sensor2lidar_translation']\n        \n        src_cam_intrinsics = np.eye(4, 4)\n        src_cam_intrinsics[:3, :3] = src_sample_cam['cam_intrinsic']\n        \n        if \"XYD\" not in src_mod:\n            src_mod_to_global = (src_ego_to_global @ src_lidar_to_ego @ \n                                 src_cam_to_lidar)\n        else:\n            src_mod_to_global = (src_ego_to_global @ src_lidar_to_ego @ \n                                 src_cam_to_lidar @ np.linalg.inv(src_cam_intrinsics))\n            \n            \n    \n    if dest_mod == \"global\":\n        dest_global_to_mod = np.eye(4, 4)\n    elif dest_mod == \"ego\":\n        dest_global_to_mod = np.linalg.inv(dest_ego_to_global)\n    elif dest_mod == \"lidar\":\n        dest_global_to_mod = np.linalg.inv(dest_ego_to_global @ dest_lidar_to_ego)\n    elif \"CAM\" in dest_mod:\n        dest_sample_cam = dest_sample['cams'][dest_mod.replace(\"_XYD\", \"\")]\n        \n        dest_cam_to_lidar = np.eye(4, 4)\n        dest_cam_to_lidar[:3, :3] = dest_sample_cam['sensor2lidar_rotation']\n        dest_cam_to_lidar[:3, 3] = dest_sample_cam['sensor2lidar_translation']\n        \n        dest_cam_intrinsics = np.eye(4, 4)\n        dest_cam_intrinsics[:3, :3] = dest_sample_cam['cam_intrinsic']\n        \n        if \"XYD\" not in dest_mod:\n            dest_global_to_mod = np.linalg.inv(dest_ego_to_global @ dest_lidar_to_ego @ \n                                               dest_cam_to_lidar)\n        else:\n            dest_global_to_mod = np.linalg.inv(dest_ego_to_global @ dest_lidar_to_ego @ \n                                               dest_cam_to_lidar @ np.linalg.inv(dest_cam_intrinsics))\n    \n    return dest_global_to_mod @ src_mod_to_global"
  },
  {
    "path": "mmdet3d/datasets/vector_map.py",
    "content": "import os\nimport json\nimport copy\nimport tempfile\nfrom typing import Dict, List\n\nimport numpy as np\nimport pyquaternion\nimport mmcv\nfrom os import path as osp\nfrom mmdet.datasets import DATASETS\nimport torch\nimport numpy as np\nfrom nuscenes.eval.common.utils import quaternion_yaw, Quaternion\n# from .vad_custom_nuscenes_eval import NuScenesEval_custom\nfrom nuscenes.eval.common.utils import center_distance\n# from projects.mmdet3d_plugin.models.utils.visual import save_tensor\nfrom mmcv.parallel import DataContainer as DC\nimport random\nfrom nuscenes.utils.data_classes import Box as NuScenesBox\n# from projects.mmdet3d_plugin.core.bbox.structures.nuscenes_box import CustomNuscenesBox\nfrom shapely import affinity, ops\nfrom shapely.geometry import LineString, box, MultiPolygon, MultiLineString\nfrom mmdet.datasets.pipelines import to_tensor\nfrom nuscenes.map_expansion.map_api import NuScenesMap, NuScenesMapExplorer\nfrom nuscenes.eval.detection.constants import DETECTION_NAMES\n\n\nclass LiDARInstanceLines(object):\n    \"\"\"Line instance in LIDAR coordinates\n\n    \"\"\"\n    def __init__(self, \n                 instance_line_list, \n                 sample_dist=1,\n                 num_samples=250,\n                 padding=False,\n                 fixed_num=-1,\n                 padding_value=-10000,\n                 patch_size=None):\n        assert isinstance(instance_line_list, list)\n        assert patch_size is not None\n        if len(instance_line_list) != 0:\n            assert isinstance(instance_line_list[0], LineString)\n        self.patch_size = patch_size\n        self.max_x = self.patch_size[1] / 2\n        self.max_y = self.patch_size[0] / 2\n        self.sample_dist = sample_dist\n        self.num_samples = num_samples\n        self.padding = padding\n        self.fixed_num = fixed_num\n        self.padding_value = padding_value\n\n        self.instance_list = instance_line_list\n\n    @property\n    def start_end_points(self):\n        \"\"\"\n        return torch.Tensor([N,4]), in xstart, ystart, xend, yend form\n        \"\"\"\n        assert len(self.instance_list) != 0\n        instance_se_points_list = []\n        for instance in self.instance_list:\n            se_points = []\n            se_points.extend(instance.coords[0])\n            se_points.extend(instance.coords[-1])\n            instance_se_points_list.append(se_points)\n        instance_se_points_array = np.array(instance_se_points_list)\n        instance_se_points_tensor = to_tensor(instance_se_points_array)\n        instance_se_points_tensor = instance_se_points_tensor.to(\n                                dtype=torch.float32)\n        instance_se_points_tensor[:,0] = torch.clamp(instance_se_points_tensor[:,0], min=-self.max_x,max=self.max_x)\n        instance_se_points_tensor[:,1] = torch.clamp(instance_se_points_tensor[:,1], min=-self.max_y,max=self.max_y)\n        instance_se_points_tensor[:,2] = torch.clamp(instance_se_points_tensor[:,2], min=-self.max_x,max=self.max_x)\n        instance_se_points_tensor[:,3] = torch.clamp(instance_se_points_tensor[:,3], min=-self.max_y,max=self.max_y)\n        return instance_se_points_tensor\n\n    @property\n    def bbox(self):\n        \"\"\"\n        return torch.Tensor([N,4]), in xmin, ymin, xmax, ymax form\n        \"\"\"\n        assert len(self.instance_list) != 0\n        instance_bbox_list = []\n        for instance in self.instance_list:\n            # bounds is bbox: [xmin, ymin, xmax, ymax]\n            instance_bbox_list.append(instance.bounds)\n        instance_bbox_array = np.array(instance_bbox_list)\n        instance_bbox_tensor = to_tensor(instance_bbox_array)\n        instance_bbox_tensor = instance_bbox_tensor.to(\n                            dtype=torch.float32)\n        instance_bbox_tensor[:,0] = torch.clamp(instance_bbox_tensor[:,0], min=-self.max_x,max=self.max_x)\n        instance_bbox_tensor[:,1] = torch.clamp(instance_bbox_tensor[:,1], min=-self.max_y,max=self.max_y)\n        instance_bbox_tensor[:,2] = torch.clamp(instance_bbox_tensor[:,2], min=-self.max_x,max=self.max_x)\n        instance_bbox_tensor[:,3] = torch.clamp(instance_bbox_tensor[:,3], min=-self.max_y,max=self.max_y)\n        return instance_bbox_tensor\n\n    @property\n    def fixed_num_sampled_points(self):\n        \"\"\"\n        return torch.Tensor([N,fixed_num,2]), in xmin, ymin, xmax, ymax form\n            N means the num of instances\n        \"\"\"\n        assert len(self.instance_list) != 0\n        instance_points_list = []\n        for instance in self.instance_list:\n            distances = np.linspace(0, instance.length, self.fixed_num)\n            sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)\n            instance_points_list.append(sampled_points)\n        instance_points_array = np.array(instance_points_list)\n        instance_points_tensor = to_tensor(instance_points_array)\n        instance_points_tensor = instance_points_tensor.to(\n                            dtype=torch.float32)\n        instance_points_tensor[:,:,0] = torch.clamp(instance_points_tensor[:,:,0], min=-self.max_x,max=self.max_x)\n        instance_points_tensor[:,:,1] = torch.clamp(instance_points_tensor[:,:,1], min=-self.max_y,max=self.max_y)\n        return instance_points_tensor\n\n    @property\n    def fixed_num_sampled_points_ambiguity(self):\n        \"\"\"\n        return torch.Tensor([N,fixed_num,2]), in xmin, ymin, xmax, ymax form\n            N means the num of instances\n        \"\"\"\n        assert len(self.instance_list) != 0\n        instance_points_list = []\n        for instance in self.instance_list:\n            distances = np.linspace(0, instance.length, self.fixed_num)\n            sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)\n            instance_points_list.append(sampled_points)\n        instance_points_array = np.array(instance_points_list)\n        instance_points_tensor = to_tensor(instance_points_array)\n        instance_points_tensor = instance_points_tensor.to(\n                            dtype=torch.float32)\n        instance_points_tensor[:,:,0] = torch.clamp(instance_points_tensor[:,:,0], min=-self.max_x,max=self.max_x)\n        instance_points_tensor[:,:,1] = torch.clamp(instance_points_tensor[:,:,1], min=-self.max_y,max=self.max_y)\n        instance_points_tensor = instance_points_tensor.unsqueeze(1)\n        return instance_points_tensor\n\n    @property\n    def fixed_num_sampled_points_torch(self):\n        \"\"\"\n        return torch.Tensor([N,fixed_num,2]), in xmin, ymin, xmax, ymax form\n            N means the num of instances\n        \"\"\"\n        assert len(self.instance_list) != 0\n        instance_points_list = []\n        for instance in self.instance_list:\n            # distances = np.linspace(0, instance.length, self.fixed_num)\n            # sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)\n            poly_pts = to_tensor(np.array(list(instance.coords)))\n            poly_pts = poly_pts.unsqueeze(0).permute(0,2,1)\n            sampled_pts = torch.nn.functional.interpolate(poly_pts,size=(self.fixed_num),mode='linear',align_corners=True)\n            sampled_pts = sampled_pts.permute(0,2,1).squeeze(0)\n            instance_points_list.append(sampled_pts)\n        # instance_points_array = np.array(instance_points_list)\n        # instance_points_tensor = to_tensor(instance_points_array)\n        instance_points_tensor = torch.stack(instance_points_list,dim=0)\n        instance_points_tensor = instance_points_tensor.to(\n                            dtype=torch.float32)\n        instance_points_tensor[:,:,0] = torch.clamp(instance_points_tensor[:,:,0], min=-self.max_x,max=self.max_x)\n        instance_points_tensor[:,:,1] = torch.clamp(instance_points_tensor[:,:,1], min=-self.max_y,max=self.max_y)\n        return instance_points_tensor\n\n    @property\n    def shift_fixed_num_sampled_points(self):\n        \"\"\"\n        return  [instances_num, num_shifts, fixed_num, 2]\n        \"\"\"\n        fixed_num_sampled_points = self.fixed_num_sampled_points\n        instances_list = []\n        is_poly = False\n        # is_line = False\n        # import pdb;pdb.set_trace()\n        for fixed_num_pts in fixed_num_sampled_points:\n            # [fixed_num, 2]\n            is_poly = fixed_num_pts[0].equal(fixed_num_pts[-1])\n            fixed_num = fixed_num_pts.shape[0]\n            shift_pts_list = []\n            if is_poly:\n                # import pdb;pdb.set_trace()\n                for shift_right_i in range(fixed_num):\n                    shift_pts_list.append(fixed_num_pts.roll(shift_right_i,0))\n            else:\n                shift_pts_list.append(fixed_num_pts)\n                shift_pts_list.append(fixed_num_pts.flip(0))\n            shift_pts = torch.stack(shift_pts_list,dim=0)\n\n            shift_pts[:,:,0] = torch.clamp(shift_pts[:,:,0], min=-self.max_x,max=self.max_x)\n            shift_pts[:,:,1] = torch.clamp(shift_pts[:,:,1], min=-self.max_y,max=self.max_y)\n\n            if not is_poly:\n                padding = torch.full([fixed_num-shift_pts.shape[0],fixed_num,2], self.padding_value)\n                shift_pts = torch.cat([shift_pts,padding],dim=0)\n                # padding = np.zeros((self.num_samples - len(sampled_points), 2))\n                # sampled_points = np.concatenate([sampled_points, padding], axis=0)\n            instances_list.append(shift_pts)\n        instances_tensor = torch.stack(instances_list, dim=0)\n        instances_tensor = instances_tensor.to(\n                            dtype=torch.float32)\n        return instances_tensor\n\n    @property\n    def shift_fixed_num_sampled_points_v1(self):\n        \"\"\"\n        return  [instances_num, num_shifts, fixed_num, 2]\n        \"\"\"\n        fixed_num_sampled_points = self.fixed_num_sampled_points\n        instances_list = []\n        is_poly = False\n        # is_line = False\n        # import pdb;pdb.set_trace()\n        for fixed_num_pts in fixed_num_sampled_points:\n            # [fixed_num, 2]\n            is_poly = fixed_num_pts[0].equal(fixed_num_pts[-1])\n            pts_num = fixed_num_pts.shape[0]\n            shift_num = pts_num - 1\n            if is_poly:\n                pts_to_shift = fixed_num_pts[:-1,:]\n            shift_pts_list = []\n            if is_poly:\n                for shift_right_i in range(shift_num):\n                    shift_pts_list.append(pts_to_shift.roll(shift_right_i,0))\n            else:\n                shift_pts_list.append(fixed_num_pts)\n                shift_pts_list.append(fixed_num_pts.flip(0))\n            shift_pts = torch.stack(shift_pts_list,dim=0)\n\n            if is_poly:\n                _, _, num_coords = shift_pts.shape\n                tmp_shift_pts = shift_pts.new_zeros((shift_num, pts_num, num_coords))\n                tmp_shift_pts[:,:-1,:] = shift_pts\n                tmp_shift_pts[:,-1,:] = shift_pts[:,0,:]\n                shift_pts = tmp_shift_pts\n\n            shift_pts[:,:,0] = torch.clamp(shift_pts[:,:,0], min=-self.max_x,max=self.max_x)\n            shift_pts[:,:,1] = torch.clamp(shift_pts[:,:,1], min=-self.max_y,max=self.max_y)\n\n            if not is_poly:\n                padding = torch.full([shift_num-shift_pts.shape[0],pts_num,2], self.padding_value)\n                shift_pts = torch.cat([shift_pts,padding],dim=0)\n                # padding = np.zeros((self.num_samples - len(sampled_points), 2))\n                # sampled_points = np.concatenate([sampled_points, padding], axis=0)\n            instances_list.append(shift_pts)\n        instances_tensor = torch.stack(instances_list, dim=0)\n        instances_tensor = instances_tensor.to(\n                            dtype=torch.float32)\n        return instances_tensor\n\n    @property\n    def shift_fixed_num_sampled_points_v2(self):\n        \"\"\"\n        return  [instances_num, num_shifts, fixed_num, 2]\n        \"\"\"\n        assert len(self.instance_list) != 0\n        instances_list = []\n        for instance in self.instance_list:\n            distances = np.linspace(0, instance.length, self.fixed_num)\n            poly_pts = np.array(list(instance.coords))\n            start_pts = poly_pts[0]\n            end_pts = poly_pts[-1]\n            is_poly = np.equal(start_pts, end_pts)\n            is_poly = is_poly.all()\n            shift_pts_list = []\n            pts_num, coords_num = poly_pts.shape\n            shift_num = pts_num - 1\n            final_shift_num = self.fixed_num - 1\n            if is_poly:\n                pts_to_shift = poly_pts[:-1,:]\n                for shift_right_i in range(shift_num):\n                    shift_pts = np.roll(pts_to_shift,shift_right_i,axis=0)\n                    pts_to_concat = shift_pts[0]\n                    pts_to_concat = np.expand_dims(pts_to_concat,axis=0)\n                    shift_pts = np.concatenate((shift_pts,pts_to_concat),axis=0)\n                    shift_instance = LineString(shift_pts)\n                    shift_sampled_points = np.array([list(shift_instance.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)\n                    shift_pts_list.append(shift_sampled_points)\n                # import pdb;pdb.set_trace()\n            else:\n                sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)\n                flip_sampled_points = np.flip(sampled_points, axis=0)\n                shift_pts_list.append(sampled_points)\n                shift_pts_list.append(flip_sampled_points)\n            \n            multi_shifts_pts = np.stack(shift_pts_list,axis=0)\n            shifts_num,_,_ = multi_shifts_pts.shape\n\n            if shifts_num > final_shift_num:\n                index = np.random.choice(multi_shifts_pts.shape[0], final_shift_num, replace=False)\n                multi_shifts_pts = multi_shifts_pts[index]\n            \n            multi_shifts_pts_tensor = to_tensor(multi_shifts_pts)\n            multi_shifts_pts_tensor = multi_shifts_pts_tensor.to(\n                            dtype=torch.float32)\n            \n            multi_shifts_pts_tensor[:,:,0] = torch.clamp(multi_shifts_pts_tensor[:,:,0], min=-self.max_x,max=self.max_x)\n            multi_shifts_pts_tensor[:,:,1] = torch.clamp(multi_shifts_pts_tensor[:,:,1], min=-self.max_y,max=self.max_y)\n            # if not is_poly:\n            if multi_shifts_pts_tensor.shape[0] < final_shift_num:\n                padding = torch.full([final_shift_num-multi_shifts_pts_tensor.shape[0],self.fixed_num,2], self.padding_value)\n                multi_shifts_pts_tensor = torch.cat([multi_shifts_pts_tensor,padding],dim=0)\n            instances_list.append(multi_shifts_pts_tensor)\n        instances_tensor = torch.stack(instances_list, dim=0)\n        instances_tensor = instances_tensor.to(\n                            dtype=torch.float32)\n        return instances_tensor\n\n    @property\n    def shift_fixed_num_sampled_points_v3(self):\n        \"\"\"\n        return  [instances_num, num_shifts, fixed_num, 2]\n        \"\"\"\n        assert len(self.instance_list) != 0\n        instances_list = []\n        for instance in self.instance_list:\n            distances = np.linspace(0, instance.length, self.fixed_num)\n            poly_pts = np.array(list(instance.coords))\n            start_pts = poly_pts[0]\n            end_pts = poly_pts[-1]\n            is_poly = np.equal(start_pts, end_pts)\n            is_poly = is_poly.all()\n            shift_pts_list = []\n            pts_num, coords_num = poly_pts.shape\n            shift_num = pts_num - 1\n            final_shift_num = self.fixed_num - 1\n            if is_poly:\n                pts_to_shift = poly_pts[:-1,:]\n                for shift_right_i in range(shift_num):\n                    shift_pts = np.roll(pts_to_shift,shift_right_i,axis=0)\n                    pts_to_concat = shift_pts[0]\n                    pts_to_concat = np.expand_dims(pts_to_concat,axis=0)\n                    shift_pts = np.concatenate((shift_pts,pts_to_concat),axis=0)\n                    shift_instance = LineString(shift_pts)\n                    shift_sampled_points = np.array([list(shift_instance.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)\n                    shift_pts_list.append(shift_sampled_points)\n                flip_pts_to_shift = np.flip(pts_to_shift, axis=0)\n                for shift_right_i in range(shift_num):\n                    shift_pts = np.roll(flip_pts_to_shift,shift_right_i,axis=0)\n                    pts_to_concat = shift_pts[0]\n                    pts_to_concat = np.expand_dims(pts_to_concat,axis=0)\n                    shift_pts = np.concatenate((shift_pts,pts_to_concat),axis=0)\n                    shift_instance = LineString(shift_pts)\n                    shift_sampled_points = np.array([list(shift_instance.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)\n                    shift_pts_list.append(shift_sampled_points)\n                # import pdb;pdb.set_trace()\n            else:\n                sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)\n                flip_sampled_points = np.flip(sampled_points, axis=0)\n                shift_pts_list.append(sampled_points)\n                shift_pts_list.append(flip_sampled_points)\n            \n            multi_shifts_pts = np.stack(shift_pts_list,axis=0)\n            shifts_num,_,_ = multi_shifts_pts.shape\n            # import pdb;pdb.set_trace()\n            if shifts_num > 2*final_shift_num:\n                index = np.random.choice(shift_num, final_shift_num, replace=False)\n                flip0_shifts_pts = multi_shifts_pts[index]\n                flip1_shifts_pts = multi_shifts_pts[index+shift_num]\n                multi_shifts_pts = np.concatenate((flip0_shifts_pts,flip1_shifts_pts),axis=0)\n            \n            multi_shifts_pts_tensor = to_tensor(multi_shifts_pts)\n            multi_shifts_pts_tensor = multi_shifts_pts_tensor.to(\n                            dtype=torch.float32)\n            \n            multi_shifts_pts_tensor[:,:,0] = torch.clamp(multi_shifts_pts_tensor[:,:,0], min=-self.max_x,max=self.max_x)\n            multi_shifts_pts_tensor[:,:,1] = torch.clamp(multi_shifts_pts_tensor[:,:,1], min=-self.max_y,max=self.max_y)\n            # if not is_poly:\n            if multi_shifts_pts_tensor.shape[0] < 2*final_shift_num:\n                padding = torch.full([final_shift_num*2-multi_shifts_pts_tensor.shape[0],self.fixed_num,2], self.padding_value)\n                multi_shifts_pts_tensor = torch.cat([multi_shifts_pts_tensor,padding],dim=0)\n            instances_list.append(multi_shifts_pts_tensor)\n        instances_tensor = torch.stack(instances_list, dim=0)\n        instances_tensor = instances_tensor.to(\n                            dtype=torch.float32)\n        return instances_tensor\n\n    @property\n    def shift_fixed_num_sampled_points_v4(self):\n        \"\"\"\n        return  [instances_num, num_shifts, fixed_num, 2]\n        \"\"\"\n        fixed_num_sampled_points = self.fixed_num_sampled_points\n        instances_list = []\n        is_poly = False\n        # is_line = False\n        # import pdb;pdb.set_trace()\n        for fixed_num_pts in fixed_num_sampled_points:\n            # [fixed_num, 2]\n            is_poly = fixed_num_pts[0].equal(fixed_num_pts[-1])\n            pts_num = fixed_num_pts.shape[0]\n            shift_num = pts_num - 1\n            shift_pts_list = []\n            if is_poly:\n                pts_to_shift = fixed_num_pts[:-1,:]\n                for shift_right_i in range(shift_num):\n                    shift_pts_list.append(pts_to_shift.roll(shift_right_i,0))\n                flip_pts_to_shift = pts_to_shift.flip(0)\n                for shift_right_i in range(shift_num):\n                    shift_pts_list.append(flip_pts_to_shift.roll(shift_right_i,0))\n            else:\n                shift_pts_list.append(fixed_num_pts)\n                shift_pts_list.append(fixed_num_pts.flip(0))\n            shift_pts = torch.stack(shift_pts_list,dim=0)\n\n            if is_poly:\n                _, _, num_coords = shift_pts.shape\n                tmp_shift_pts = shift_pts.new_zeros((shift_num*2, pts_num, num_coords))\n                tmp_shift_pts[:,:-1,:] = shift_pts\n                tmp_shift_pts[:,-1,:] = shift_pts[:,0,:]\n                shift_pts = tmp_shift_pts\n\n            shift_pts[:,:,0] = torch.clamp(shift_pts[:,:,0], min=-self.max_x,max=self.max_x)\n            shift_pts[:,:,1] = torch.clamp(shift_pts[:,:,1], min=-self.max_y,max=self.max_y)\n\n            if not is_poly:\n                padding = torch.full([shift_num*2-shift_pts.shape[0],pts_num,2], self.padding_value)\n                shift_pts = torch.cat([shift_pts,padding],dim=0)\n                # padding = np.zeros((self.num_samples - len(sampled_points), 2))\n                # sampled_points = np.concatenate([sampled_points, padding], axis=0)\n            instances_list.append(shift_pts)\n        instances_tensor = torch.stack(instances_list, dim=0)\n        instances_tensor = instances_tensor.to(\n                            dtype=torch.float32)\n        return instances_tensor\n\n    @property\n    def shift_fixed_num_sampled_points_torch(self):\n        \"\"\"\n        return  [instances_num, num_shifts, fixed_num, 2]\n        \"\"\"\n        fixed_num_sampled_points = self.fixed_num_sampled_points_torch\n        instances_list = []\n        is_poly = False\n        # is_line = False\n        # import pdb;pdb.set_trace()\n        for fixed_num_pts in fixed_num_sampled_points:\n            # [fixed_num, 2]\n            is_poly = fixed_num_pts[0].equal(fixed_num_pts[-1])\n            fixed_num = fixed_num_pts.shape[0]\n            shift_pts_list = []\n            if is_poly:\n                # import pdb;pdb.set_trace()\n                for shift_right_i in range(fixed_num):\n                    shift_pts_list.append(fixed_num_pts.roll(shift_right_i,0))\n            else:\n                shift_pts_list.append(fixed_num_pts)\n                shift_pts_list.append(fixed_num_pts.flip(0))\n            shift_pts = torch.stack(shift_pts_list,dim=0)\n\n            shift_pts[:,:,0] = torch.clamp(shift_pts[:,:,0], min=-self.max_x,max=self.max_x)\n            shift_pts[:,:,1] = torch.clamp(shift_pts[:,:,1], min=-self.max_y,max=self.max_y)\n\n            if not is_poly:\n                padding = torch.full([fixed_num-shift_pts.shape[0],fixed_num,2], self.padding_value)\n                shift_pts = torch.cat([shift_pts,padding],dim=0)\n                # padding = np.zeros((self.num_samples - len(sampled_points), 2))\n                # sampled_points = np.concatenate([sampled_points, padding], axis=0)\n            instances_list.append(shift_pts)\n        instances_tensor = torch.stack(instances_list, dim=0)\n        instances_tensor = instances_tensor.to(\n                            dtype=torch.float32)\n        return instances_tensor\n\n    # @property\n    # def polyline_points(self):\n    #     \"\"\"\n    #     return [[x0,y0],[x1,y1],...]\n    #     \"\"\"\n    #     assert len(self.instance_list) != 0\n    #     for instance in self.instance_list:\n\n\nclass VectorizedLocalMap(object):\n    CLASS2LABEL = {\n        'road_divider': 0,\n        'lane_divider': 0,\n        'ped_crossing': 1,\n        'contours': 2,\n        'others': -1\n    }\n    def __init__(self,\n                 dataroot,\n                 patch_size,\n                 map_classes=['divider','ped_crossing','boundary'],\n                 line_classes=['road_divider', 'lane_divider'],\n                 ped_crossing_classes=['ped_crossing'],\n                 contour_classes=['road_segment', 'lane'],\n                 sample_dist=1,\n                 num_samples=250,\n                 padding=False,\n                 fixed_ptsnum_per_line=-1,\n                 padding_value=-10000,):\n        '''\n        Args:\n            fixed_ptsnum_per_line = -1 : no fixed num\n        '''\n        super().__init__()\n        self.data_root = dataroot\n        self.MAPS = ['boston-seaport', 'singapore-hollandvillage',\n                     'singapore-onenorth', 'singapore-queenstown']\n        self.vec_classes = map_classes\n        self.line_classes = line_classes\n        self.ped_crossing_classes = ped_crossing_classes\n        self.polygon_classes = contour_classes\n        self.nusc_maps = {}\n        self.map_explorer = {}\n        for loc in self.MAPS:\n            self.nusc_maps[loc] = NuScenesMap(dataroot=self.data_root, map_name=loc)\n            self.map_explorer[loc] = NuScenesMapExplorer(self.nusc_maps[loc])\n\n        self.patch_size = patch_size\n        self.sample_dist = sample_dist\n        self.num_samples = num_samples\n        self.padding = padding\n        self.fixed_num = fixed_ptsnum_per_line\n        self.padding_value = padding_value\n\n    def gen_vectorized_samples(self, location, lidar2global_translation, patch_angle, flip_dx, flip_dy):\n        '''\n        use lidar2global to get gt map layers\n        '''\n        \n        map_pose = lidar2global_translation[:2]\n        # rotation = Quaternion(lidar2global_rotation)\n\n        patch_box = (map_pose[0], map_pose[1], self.patch_size[0], self.patch_size[1])\n        # patch_angle = quaternion_yaw(rotation) / np.pi * 180\n        # import pdb;pdb.set_trace()\n        vectors = []\n        for vec_class in self.vec_classes:\n            if vec_class == 'divider':\n                line_geom = self.get_map_geom(patch_box, patch_angle, self.line_classes, location, flip_dx, flip_dy)\n                line_instances_dict = self.line_geoms_to_instances(line_geom)     \n                for line_type, instances in line_instances_dict.items():\n                    for instance in instances:\n                        vectors.append((instance, self.CLASS2LABEL.get(line_type, -1)))\n            elif vec_class == 'ped_crossing':\n                ped_geom = self.get_map_geom(patch_box, patch_angle, self.ped_crossing_classes, location, flip_dx, flip_dy)\n                # ped_vector_list = self.ped_geoms_to_vectors(ped_geom)\n                ped_instance_list = self.ped_poly_geoms_to_instances(ped_geom)\n                # import pdb;pdb.set_trace()\n                for instance in ped_instance_list:\n                    vectors.append((instance, self.CLASS2LABEL.get('ped_crossing', -1)))\n            elif vec_class == 'boundary':\n                polygon_geom = self.get_map_geom(patch_box, patch_angle, self.polygon_classes, location, flip_dx, flip_dy)\n                # import pdb;pdb.set_trace()\n                poly_bound_list = self.poly_geoms_to_instances(polygon_geom)\n                # import pdb;pdb.set_trace()\n                for contour in poly_bound_list:\n                    vectors.append((contour, self.CLASS2LABEL.get('contours', -1)))\n            else:\n                raise ValueError(f'WRONG vec_class: {vec_class}')\n\n        # filter out -1\n        filtered_vectors = []\n        gt_pts_loc_3d = []\n        gt_pts_num_3d = []\n        gt_labels = []\n        gt_instance = []\n        for instance, type in vectors:\n            if type != -1:\n                gt_instance.append(instance)\n                gt_labels.append(type)\n        \n        gt_instance = LiDARInstanceLines(gt_instance,self.sample_dist,\n                        self.num_samples, self.padding, self.fixed_num,self.padding_value, patch_size=self.patch_size)\n\n        anns_results = dict(\n            gt_vecs_pts_loc=gt_instance,\n            gt_vecs_label=gt_labels,\n\n        )\n        # import pdb;pdb.set_trace()\n        return anns_results\n\n    def get_map_geom(self, patch_box, patch_angle, layer_names, location, flip_dx, flip_dy):\n        map_geom = []\n        for layer_name in layer_names:\n            if layer_name in self.line_classes:\n                # import pdb;pdb.set_trace()\n                geoms = self.get_divider_line(patch_box, patch_angle, layer_name, location, flip_dx, flip_dy)\n                # import pdb;pdb.set_trace()\n                # geoms = self.map_explorer[location]._get_layer_line(patch_box, patch_angle, layer_name)\n                map_geom.append((layer_name, geoms))\n            elif layer_name in self.polygon_classes:\n                geoms = self.get_contour_line(patch_box, patch_angle, layer_name, location, flip_dx, flip_dy)\n                # geoms = self.map_explorer[location]._get_layer_polygon(patch_box, patch_angle, layer_name)\n                map_geom.append((layer_name, geoms))\n            elif layer_name in self.ped_crossing_classes:\n                geoms = self.get_ped_crossing_line(patch_box, patch_angle, location, flip_dx, flip_dy)\n                # geoms = self.map_explorer[location]._get_layer_polygon(patch_box, patch_angle, layer_name)\n                map_geom.append((layer_name, geoms))\n        return map_geom\n\n    def _one_type_line_geom_to_vectors(self, line_geom):\n        line_vectors = []\n        \n        for line in line_geom:\n            if not line.is_empty:\n                if line.geom_type == 'MultiLineString':\n                    for single_line in line.geoms:\n                        line_vectors.append(self.sample_pts_from_line(single_line))\n                elif line.geom_type == 'LineString':\n                    line_vectors.append(self.sample_pts_from_line(line))\n                else:\n                    raise NotImplementedError\n        return line_vectors\n\n    def _one_type_line_geom_to_instances(self, line_geom):\n        line_instances = []\n        \n        for line in line_geom:\n            if not line.is_empty:\n                if line.geom_type == 'MultiLineString':\n                    for single_line in line.geoms:\n                        line_instances.append(single_line)\n                elif line.geom_type == 'LineString':\n                    line_instances.append(line)\n                else:\n                    raise NotImplementedError\n        return line_instances\n\n    def poly_geoms_to_vectors(self, polygon_geom):\n        roads = polygon_geom[0][1]\n        lanes = polygon_geom[1][1]\n        union_roads = ops.unary_union(roads)\n        union_lanes = ops.unary_union(lanes)\n        union_segments = ops.unary_union([union_roads, union_lanes])\n        max_x = self.patch_size[1] / 2\n        max_y = self.patch_size[0] / 2\n        local_patch = box(-max_x + 0.2, -max_y + 0.2, max_x - 0.2, max_y - 0.2)\n        exteriors = []\n        interiors = []\n        if union_segments.geom_type != 'MultiPolygon':\n            union_segments = MultiPolygon([union_segments])\n        for poly in union_segments.geoms:\n            exteriors.append(poly.exterior)\n            for inter in poly.interiors:\n                interiors.append(inter)\n\n        results = []\n        for ext in exteriors:\n            if ext.is_ccw:\n                ext.coords = list(ext.coords)[::-1]\n            lines = ext.intersection(local_patch)\n            if isinstance(lines, MultiLineString):\n                lines = ops.linemerge(lines)\n            results.append(lines)\n\n        for inter in interiors:\n            if not inter.is_ccw:\n                inter.coords = list(inter.coords)[::-1]\n            lines = inter.intersection(local_patch)\n            if isinstance(lines, MultiLineString):\n                lines = ops.linemerge(lines)\n            results.append(lines)\n\n        return self._one_type_line_geom_to_vectors(results)\n\n    def ped_poly_geoms_to_instances(self, ped_geom):\n        # import pdb;pdb.set_trace()\n        ped = ped_geom[0][1]\n        union_segments = ops.unary_union(ped)\n        max_x = self.patch_size[1] / 2\n        max_y = self.patch_size[0] / 2\n        # local_patch = box(-max_x + 0.2, -max_y + 0.2, max_x - 0.2, max_y - 0.2)\n        local_patch = box(-max_x - 0.2, -max_y - 0.2, max_x + 0.2, max_y + 0.2)\n        exteriors = []\n        interiors = []\n        if union_segments.geom_type != 'MultiPolygon':\n            union_segments = MultiPolygon([union_segments])\n        for poly in union_segments.geoms:\n            exteriors.append(poly.exterior)\n            for inter in poly.interiors:\n                interiors.append(inter)\n\n        results = []\n        for ext in exteriors:\n            if ext.is_ccw:\n                ext.coords = list(ext.coords)[::-1]\n            lines = ext.intersection(local_patch)\n            if isinstance(lines, MultiLineString):\n                lines = ops.linemerge(lines)\n            results.append(lines)\n\n        for inter in interiors:\n            if not inter.is_ccw:\n                inter.coords = list(inter.coords)[::-1]\n            lines = inter.intersection(local_patch)\n            if isinstance(lines, MultiLineString):\n                lines = ops.linemerge(lines)\n            results.append(lines)\n\n        return self._one_type_line_geom_to_instances(results)\n\n\n    def poly_geoms_to_instances(self, polygon_geom):\n        roads = polygon_geom[0][1]\n        lanes = polygon_geom[1][1]\n        union_roads = ops.unary_union(roads)\n        union_lanes = ops.unary_union(lanes)\n        union_segments = ops.unary_union([union_roads, union_lanes])\n        max_x = self.patch_size[1] / 2\n        max_y = self.patch_size[0] / 2\n        local_patch = box(-max_x + 0.2, -max_y + 0.2, max_x - 0.2, max_y - 0.2)\n        exteriors = []\n        interiors = []\n        if union_segments.geom_type != 'MultiPolygon':\n            union_segments = MultiPolygon([union_segments])\n        for poly in union_segments.geoms:\n            exteriors.append(poly.exterior)\n            for inter in poly.interiors:\n                interiors.append(inter)\n\n        results = []\n        for ext in exteriors:\n            if ext.is_ccw:\n                ext.coords = list(ext.coords)[::-1]\n            lines = ext.intersection(local_patch)\n            if isinstance(lines, MultiLineString):\n                lines = ops.linemerge(lines)\n            results.append(lines)\n\n        for inter in interiors:\n            if not inter.is_ccw:\n                inter.coords = list(inter.coords)[::-1]\n            lines = inter.intersection(local_patch)\n            if isinstance(lines, MultiLineString):\n                lines = ops.linemerge(lines)\n            results.append(lines)\n\n        return self._one_type_line_geom_to_instances(results)\n\n    def line_geoms_to_vectors(self, line_geom):\n        line_vectors_dict = dict()\n        for line_type, a_type_of_lines in line_geom:\n            one_type_vectors = self._one_type_line_geom_to_vectors(a_type_of_lines)\n            line_vectors_dict[line_type] = one_type_vectors\n\n        return line_vectors_dict\n    def line_geoms_to_instances(self, line_geom):\n        line_instances_dict = dict()\n        for line_type, a_type_of_lines in line_geom:\n            one_type_instances = self._one_type_line_geom_to_instances(a_type_of_lines)\n            line_instances_dict[line_type] = one_type_instances\n\n        return line_instances_dict\n\n    def ped_geoms_to_vectors(self, ped_geom):\n        ped_geom = ped_geom[0][1]\n        union_ped = ops.unary_union(ped_geom)\n        if union_ped.geom_type != 'MultiPolygon':\n            union_ped = MultiPolygon([union_ped])\n\n        max_x = self.patch_size[1] / 2\n        max_y = self.patch_size[0] / 2\n        local_patch = box(-max_x + 0.2, -max_y + 0.2, max_x - 0.2, max_y - 0.2)\n        results = []\n        for ped_poly in union_ped:\n            # rect = ped_poly.minimum_rotated_rectangle\n            ext = ped_poly.exterior\n            if not ext.is_ccw:\n                ext.coords = list(ext.coords)[::-1]\n            lines = ext.intersection(local_patch)\n            results.append(lines)\n\n        return self._one_type_line_geom_to_vectors(results)\n\n    def get_contour_line(self,patch_box,patch_angle,layer_name,location, flip_dx, flip_dy):\n        if layer_name not in self.map_explorer[location].map_api.non_geometric_polygon_layers:\n            raise ValueError('{} is not a polygonal layer'.format(layer_name))\n\n        patch_x = patch_box[0]\n        patch_y = patch_box[1]\n\n        patch = self.map_explorer[location].get_patch_coord(patch_box, patch_angle)\n\n        records = getattr(self.map_explorer[location].map_api, layer_name)\n\n        polygon_list = []\n        if layer_name == 'drivable_area':\n            for record in records:\n                polygons = [self.map_explorer[location].map_api.extract_polygon(polygon_token) for polygon_token in record['polygon_tokens']]\n\n                for polygon in polygons:\n                    new_polygon = polygon.intersection(patch)\n                    if not new_polygon.is_empty:\n                        new_polygon = affinity.rotate(new_polygon, -patch_angle,\n                                                      origin=(patch_x, patch_y), use_radians=False)\n                        a = 1. if not flip_dx else -1.\n                        e = 1. if not flip_dy else -1.\n                        new_polygon = affinity.affine_transform(new_polygon,\n                                                                [1, 0.0, 0.0, 1, -patch_x, -patch_y])\n                        new_polygon = affinity.affine_transform(new_polygon,\n                                                                [a, 0.0, 0.0, e, 0, 0])\n                        if new_polygon.geom_type == 'Polygon':\n                            new_polygon = MultiPolygon([new_polygon])\n                        polygon_list.append(new_polygon)\n\n        else:\n            for record in records:\n                polygon = self.map_explorer[location].map_api.extract_polygon(record['polygon_token'])\n\n                if polygon.is_valid:\n                    new_polygon = polygon.intersection(patch)\n                    if not new_polygon.is_empty:\n                        new_polygon = affinity.rotate(new_polygon, -patch_angle,\n                                                      origin=(patch_x, patch_y), use_radians=False)\n                        a = 1. if not flip_dx else -1.\n                        e = 1. if not flip_dy else -1.\n                        new_polygon = affinity.affine_transform(new_polygon,\n                                                                [1, 0.0, 0.0, 1, -patch_x, -patch_y])\n                        new_polygon = affinity.affine_transform(new_polygon,\n                                                                [a, 0.0, 0.0, e, 0, 0])\n                        if new_polygon.geom_type == 'Polygon':\n                            new_polygon = MultiPolygon([new_polygon])\n                        polygon_list.append(new_polygon)\n\n        return polygon_list\n\n    def get_divider_line(self,patch_box,patch_angle,layer_name,location, flip_dx, flip_dy):\n        if layer_name not in self.map_explorer[location].map_api.non_geometric_line_layers:\n            raise ValueError(\"{} is not a line layer\".format(layer_name))\n\n        if layer_name == 'traffic_light':\n            return None\n\n        patch_x = patch_box[0]\n        patch_y = patch_box[1]\n\n        patch = self.map_explorer[location].get_patch_coord(patch_box, patch_angle)\n\n        line_list = []\n        records = getattr(self.map_explorer[location].map_api, layer_name)\n        for record in records:\n            line = self.map_explorer[location].map_api.extract_line(record['line_token'])\n            if line.is_empty:  # Skip lines without nodes.\n                continue\n\n            new_line = line.intersection(patch)\n            if not new_line.is_empty:\n                new_line = affinity.rotate(new_line, -patch_angle, origin=(patch_x, patch_y), use_radians=False)\n                a = 1. if not flip_dx else -1.\n                e = 1. if not flip_dy else -1.\n                new_line = affinity.affine_transform(new_line,\n                                                     [1, 0.0, 0.0, 1, -patch_x, -patch_y])\n                new_line = affinity.affine_transform(new_line,\n                                                     [a, 0.0, 0.0, e, 0, 0])\n                # [a, b, d, e, xoff, yoff]\n                #                 which represents the augmented matrix::\n                #     [x']   / a  b xoff \\ [x]\n                #     [y'] = | d  e yoff | [y]\n                #     [1 ]   \\ 0  0   1  / [1]\n                # or the equations for the transformed coordinates::\n                #     x' = a * x + b * y + xoff\n                #     y' = d * x + e * y + yoff\n                line_list.append(new_line)\n\n        return line_list\n\n    def get_ped_crossing_line(self, patch_box, patch_angle, location, flip_dx, flip_dy):\n        patch_x = patch_box[0]\n        patch_y = patch_box[1]\n\n        patch = self.map_explorer[location].get_patch_coord(patch_box, patch_angle)\n        polygon_list = []\n        records = getattr(self.map_explorer[location].map_api, 'ped_crossing')\n        # records = getattr(self.nusc_maps[location], 'ped_crossing')\n        for record in records:\n            polygon = self.map_explorer[location].map_api.extract_polygon(record['polygon_token'])\n            if polygon.is_valid:\n                new_polygon = polygon.intersection(patch)\n                if not new_polygon.is_empty:\n                    new_polygon = affinity.rotate(new_polygon, -patch_angle,\n                                                      origin=(patch_x, patch_y), use_radians=False)\n                    a = 1. if not flip_dx else -1.\n                    e = 1. if not flip_dy else -1.\n                    new_polygon = affinity.affine_transform(new_polygon,\n                                                            [1, 0.0, 0.0, 1, -patch_x, -patch_y])\n                    new_polygon = affinity.affine_transform(new_polygon,\n                                                            [a, 0.0, 0.0, e, 0, 0])\n                    if new_polygon.geom_type == 'Polygon':\n                        new_polygon = MultiPolygon([new_polygon])\n                    polygon_list.append(new_polygon)\n\n        return polygon_list\n\n    def sample_pts_from_line(self, line):\n        if self.fixed_num < 0:\n            distances = np.arange(0, line.length, self.sample_dist)\n            sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)\n        else:\n            # fixed number of points, so distance is line.length / self.fixed_num\n            distances = np.linspace(0, line.length, self.fixed_num)\n            sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)\n\n            # tmpdistances = np.linspace(0, line.length, 2)\n            # tmpsampled_points = np.array([list(line.interpolate(tmpdistance).coords) for tmpdistance in tmpdistances]).reshape(-1, 2)\n        # import pdb;pdb.set_trace()\n        # if self.normalize:\n        #     sampled_points = sampled_points / np.array([self.patch_size[1], self.patch_size[0]])\n\n        num_valid = len(sampled_points)\n\n        if not self.padding or self.fixed_num > 0:\n            # fixed num sample can return now!\n            return sampled_points, num_valid\n\n        # fixed distance sampling need padding!\n        num_valid = len(sampled_points)\n\n        if self.fixed_num < 0:\n            if num_valid < self.num_samples:\n                padding = np.zeros((self.num_samples - len(sampled_points), 2))\n                sampled_points = np.concatenate([sampled_points, padding], axis=0)\n            else:\n                sampled_points = sampled_points[:self.num_samples, :]\n                num_valid = self.num_samples\n\n            # if self.normalize:\n            #     sampled_points = sampled_points / np.array([self.patch_size[1], self.patch_size[0]])\n            #     num_valid = len(sampled_points)\n\n        return sampled_points, num_valid\n"
  },
  {
    "path": "mmdet3d/datasets/waymo_dataset.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os\nimport tempfile\nfrom os import path as osp\n\nimport mmcv\nimport numpy as np\nimport torch\nfrom mmcv.utils import print_log\n\nfrom ..core.bbox import Box3DMode, points_cam2img\nfrom .builder import DATASETS\nfrom .kitti_dataset import KittiDataset\n\n\n@DATASETS.register_module()\nclass WaymoDataset(KittiDataset):\n    \"\"\"Waymo Dataset.\n\n    This class serves as the API for experiments on the Waymo Dataset.\n\n    Please refer to `<https://waymo.com/open/download/>`_for data downloading.\n    It is recommended to symlink the dataset root to $MMDETECTION3D/data and\n    organize them as the doc shows.\n\n    Args:\n        data_root (str): Path of dataset root.\n        ann_file (str): Path of annotation file.\n        split (str): Split of input data.\n        pts_prefix (str, optional): Prefix of points files.\n            Defaults to 'velodyne'.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        box_type_3d (str, optional): Type of 3D box of this dataset.\n            Based on the `box_type_3d`, the dataset will encapsulate the box\n            to its original format then converted them to `box_type_3d`.\n            Defaults to 'LiDAR' in this dataset. Available options includes\n\n            - 'LiDAR': box in LiDAR coordinates\n            - 'Depth': box in depth coordinates, usually for indoor dataset\n            - 'Camera': box in camera coordinates\n        filter_empty_gt (bool, optional): Whether to filter empty GT.\n            Defaults to True.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n        pcd_limit_range (list(float), optional): The range of point cloud used\n            to filter invalid predicted boxes.\n            Default: [-85, -85, -5, 85, 85, 5].\n    \"\"\"\n\n    CLASSES = ('Car', 'Cyclist', 'Pedestrian')\n\n    def __init__(self,\n                 data_root,\n                 ann_file,\n                 split,\n                 pts_prefix='velodyne',\n                 pipeline=None,\n                 classes=None,\n                 modality=None,\n                 box_type_3d='LiDAR',\n                 filter_empty_gt=True,\n                 test_mode=False,\n                 load_interval=1,\n                 pcd_limit_range=[-85, -85, -5, 85, 85, 5],\n                 **kwargs):\n        super().__init__(\n            data_root=data_root,\n            ann_file=ann_file,\n            split=split,\n            pts_prefix=pts_prefix,\n            pipeline=pipeline,\n            classes=classes,\n            modality=modality,\n            box_type_3d=box_type_3d,\n            filter_empty_gt=filter_empty_gt,\n            test_mode=test_mode,\n            pcd_limit_range=pcd_limit_range,\n            **kwargs)\n\n        # to load a subset, just set the load_interval in the dataset config\n        self.data_infos = self.data_infos[::load_interval]\n        if hasattr(self, 'flag'):\n            self.flag = self.flag[::load_interval]\n\n    def _get_pts_filename(self, idx):\n        pts_filename = osp.join(self.root_split, self.pts_prefix,\n                                f'{idx:07d}.bin')\n        return pts_filename\n\n    def get_data_info(self, index):\n        \"\"\"Get data info according to the given index.\n\n        Args:\n            index (int): Index of the sample data to get.\n\n        Returns:\n            dict: Standard input_dict consists of the\n                data information.\n\n                - sample_idx (str): sample index\n                - pts_filename (str): filename of point clouds\n                - img_prefix (str): prefix of image files\n                - img_info (dict): image info\n                - lidar2img (list[np.ndarray], optional): transformations from\n                    lidar to different cameras\n                - ann_info (dict): annotation info\n        \"\"\"\n        info = self.data_infos[index]\n        sample_idx = info['image']['image_idx']\n        img_filename = os.path.join(self.data_root,\n                                    info['image']['image_path'])\n\n        # TODO: consider use torch.Tensor only\n        rect = info['calib']['R0_rect'].astype(np.float32)\n        Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)\n        P0 = info['calib']['P0'].astype(np.float32)\n        lidar2img = P0 @ rect @ Trv2c\n\n        pts_filename = self._get_pts_filename(sample_idx)\n        input_dict = dict(\n            sample_idx=sample_idx,\n            pts_filename=pts_filename,\n            img_prefix=None,\n            img_info=dict(filename=img_filename),\n            lidar2img=lidar2img)\n\n        if not self.test_mode:\n            annos = self.get_ann_info(index)\n            input_dict['ann_info'] = annos\n\n        return input_dict\n\n    def format_results(self,\n                       outputs,\n                       pklfile_prefix=None,\n                       submission_prefix=None,\n                       data_format='waymo'):\n        \"\"\"Format the results to pkl file.\n\n        Args:\n            outputs (list[dict]): Testing results of the dataset.\n            pklfile_prefix (str): The prefix of pkl files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n            submission_prefix (str): The prefix of submitted files. It\n                includes the file path and the prefix of filename, e.g.,\n                \"a/b/prefix\". If not specified, a temp file will be created.\n                Default: None.\n            data_format (str, optional): Output data format.\n                Default: 'waymo'. Another supported choice is 'kitti'.\n\n        Returns:\n            tuple: (result_files, tmp_dir), result_files is a dict containing\n                the json filepaths, tmp_dir is the temporal directory created\n                for saving json files when jsonfile_prefix is not specified.\n        \"\"\"\n        if pklfile_prefix is None:\n            tmp_dir = tempfile.TemporaryDirectory()\n            pklfile_prefix = osp.join(tmp_dir.name, 'results')\n        else:\n            tmp_dir = None\n\n        assert ('waymo' in data_format or 'kitti' in data_format), \\\n            f'invalid data_format {data_format}'\n\n        if (not isinstance(outputs[0], dict)) or 'img_bbox' in outputs[0]:\n            raise TypeError('Not supported type for reformat results.')\n        elif 'pts_bbox' in outputs[0]:\n            result_files = dict()\n            for name in outputs[0]:\n                results_ = [out[name] for out in outputs]\n                pklfile_prefix_ = pklfile_prefix + name\n                if submission_prefix is not None:\n                    submission_prefix_ = f'{submission_prefix}_{name}'\n                else:\n                    submission_prefix_ = None\n                result_files_ = self.bbox2result_kitti(results_, self.CLASSES,\n                                                       pklfile_prefix_,\n                                                       submission_prefix_)\n                result_files[name] = result_files_\n        else:\n            result_files = self.bbox2result_kitti(outputs, self.CLASSES,\n                                                  pklfile_prefix,\n                                                  submission_prefix)\n        if 'waymo' in data_format:\n            from ..core.evaluation.waymo_utils.prediction_kitti_to_waymo import \\\n                KITTI2Waymo  # noqa\n            waymo_root = osp.join(\n                self.data_root.split('kitti_format')[0], 'waymo_format')\n            if self.split == 'training':\n                waymo_tfrecords_dir = osp.join(waymo_root, 'validation')\n                prefix = '1'\n            elif self.split == 'testing':\n                waymo_tfrecords_dir = osp.join(waymo_root, 'testing')\n                prefix = '2'\n            else:\n                raise ValueError('Not supported split value.')\n            save_tmp_dir = tempfile.TemporaryDirectory()\n            waymo_results_save_dir = save_tmp_dir.name\n            waymo_results_final_path = f'{pklfile_prefix}.bin'\n            if 'pts_bbox' in result_files:\n                converter = KITTI2Waymo(result_files['pts_bbox'],\n                                        waymo_tfrecords_dir,\n                                        waymo_results_save_dir,\n                                        waymo_results_final_path, prefix)\n            else:\n                converter = KITTI2Waymo(result_files, waymo_tfrecords_dir,\n                                        waymo_results_save_dir,\n                                        waymo_results_final_path, prefix)\n            converter.convert()\n            save_tmp_dir.cleanup()\n\n        return result_files, tmp_dir\n\n    def evaluate(self,\n                 results,\n                 metric='waymo',\n                 logger=None,\n                 pklfile_prefix=None,\n                 submission_prefix=None,\n                 show=False,\n                 out_dir=None,\n                 pipeline=None):\n        \"\"\"Evaluation in KITTI protocol.\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            metric (str | list[str], optional): Metrics to be evaluated.\n                Default: 'waymo'. Another supported metric is 'kitti'.\n            logger (logging.Logger | str, optional): Logger used for printing\n                related information during evaluation. Default: None.\n            pklfile_prefix (str, optional): The prefix of pkl files including\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n            submission_prefix (str, optional): The prefix of submission data.\n                If not specified, the submission data will not be generated.\n            show (bool, optional): Whether to visualize.\n                Default: False.\n            out_dir (str, optional): Path to save the visualization results.\n                Default: None.\n            pipeline (list[dict], optional): raw data loading for showing.\n                Default: None.\n\n        Returns:\n            dict[str: float]: results of each evaluation metric\n        \"\"\"\n        assert ('waymo' in metric or 'kitti' in metric), \\\n            f'invalid metric {metric}'\n        if 'kitti' in metric:\n            result_files, tmp_dir = self.format_results(\n                results,\n                pklfile_prefix,\n                submission_prefix,\n                data_format='kitti')\n            from mmdet3d.core.evaluation import kitti_eval\n            gt_annos = [info['annos'] for info in self.data_infos]\n\n            if isinstance(result_files, dict):\n                ap_dict = dict()\n                for name, result_files_ in result_files.items():\n                    eval_types = ['bev', '3d']\n                    ap_result_str, ap_dict_ = kitti_eval(\n                        gt_annos,\n                        result_files_,\n                        self.CLASSES,\n                        eval_types=eval_types)\n                    for ap_type, ap in ap_dict_.items():\n                        ap_dict[f'{name}/{ap_type}'] = float(\n                            '{:.4f}'.format(ap))\n\n                    print_log(\n                        f'Results of {name}:\\n' + ap_result_str, logger=logger)\n\n            else:\n                ap_result_str, ap_dict = kitti_eval(\n                    gt_annos,\n                    result_files,\n                    self.CLASSES,\n                    eval_types=['bev', '3d'])\n                print_log('\\n' + ap_result_str, logger=logger)\n        if 'waymo' in metric:\n            waymo_root = osp.join(\n                self.data_root.split('kitti_format')[0], 'waymo_format')\n            if pklfile_prefix is None:\n                eval_tmp_dir = tempfile.TemporaryDirectory()\n                pklfile_prefix = osp.join(eval_tmp_dir.name, 'results')\n            else:\n                eval_tmp_dir = None\n            result_files, tmp_dir = self.format_results(\n                results,\n                pklfile_prefix,\n                submission_prefix,\n                data_format='waymo')\n            import subprocess\n            ret_bytes = subprocess.check_output(\n                'mmdet3d/core/evaluation/waymo_utils/' +\n                f'compute_detection_metrics_main {pklfile_prefix}.bin ' +\n                f'{waymo_root}/gt.bin',\n                shell=True)\n            ret_texts = ret_bytes.decode('utf-8')\n            print_log(ret_texts)\n            # parse the text to get ap_dict\n            ap_dict = {\n                'Vehicle/L1 mAP': 0,\n                'Vehicle/L1 mAPH': 0,\n                'Vehicle/L2 mAP': 0,\n                'Vehicle/L2 mAPH': 0,\n                'Pedestrian/L1 mAP': 0,\n                'Pedestrian/L1 mAPH': 0,\n                'Pedestrian/L2 mAP': 0,\n                'Pedestrian/L2 mAPH': 0,\n                'Sign/L1 mAP': 0,\n                'Sign/L1 mAPH': 0,\n                'Sign/L2 mAP': 0,\n                'Sign/L2 mAPH': 0,\n                'Cyclist/L1 mAP': 0,\n                'Cyclist/L1 mAPH': 0,\n                'Cyclist/L2 mAP': 0,\n                'Cyclist/L2 mAPH': 0,\n                'Overall/L1 mAP': 0,\n                'Overall/L1 mAPH': 0,\n                'Overall/L2 mAP': 0,\n                'Overall/L2 mAPH': 0\n            }\n            mAP_splits = ret_texts.split('mAP ')\n            mAPH_splits = ret_texts.split('mAPH ')\n            for idx, key in enumerate(ap_dict.keys()):\n                split_idx = int(idx / 2) + 1\n                if idx % 2 == 0:  # mAP\n                    ap_dict[key] = float(mAP_splits[split_idx].split(']')[0])\n                else:  # mAPH\n                    ap_dict[key] = float(mAPH_splits[split_idx].split(']')[0])\n            ap_dict['Overall/L1 mAP'] = \\\n                (ap_dict['Vehicle/L1 mAP'] + ap_dict['Pedestrian/L1 mAP'] +\n                 ap_dict['Cyclist/L1 mAP']) / 3\n            ap_dict['Overall/L1 mAPH'] = \\\n                (ap_dict['Vehicle/L1 mAPH'] + ap_dict['Pedestrian/L1 mAPH'] +\n                 ap_dict['Cyclist/L1 mAPH']) / 3\n            ap_dict['Overall/L2 mAP'] = \\\n                (ap_dict['Vehicle/L2 mAP'] + ap_dict['Pedestrian/L2 mAP'] +\n                 ap_dict['Cyclist/L2 mAP']) / 3\n            ap_dict['Overall/L2 mAPH'] = \\\n                (ap_dict['Vehicle/L2 mAPH'] + ap_dict['Pedestrian/L2 mAPH'] +\n                 ap_dict['Cyclist/L2 mAPH']) / 3\n            if eval_tmp_dir is not None:\n                eval_tmp_dir.cleanup()\n\n        if tmp_dir is not None:\n            tmp_dir.cleanup()\n\n        if show or out_dir:\n            self.show(results, out_dir, show=show, pipeline=pipeline)\n        return ap_dict\n\n    def bbox2result_kitti(self,\n                          net_outputs,\n                          class_names,\n                          pklfile_prefix=None,\n                          submission_prefix=None):\n        \"\"\"Convert results to kitti format for evaluation and test submission.\n\n        Args:\n            net_outputs (List[np.ndarray]): list of array storing the\n                bbox and score\n            class_nanes (List[String]): A list of class names\n            pklfile_prefix (str): The prefix of pkl file.\n            submission_prefix (str): The prefix of submission file.\n\n        Returns:\n            List[dict]: A list of dict have the kitti 3d format\n        \"\"\"\n        assert len(net_outputs) == len(self.data_infos), \\\n            'invalid list length of network outputs'\n        if submission_prefix is not None:\n            mmcv.mkdir_or_exist(submission_prefix)\n\n        det_annos = []\n        print('\\nConverting prediction to KITTI format')\n        for idx, pred_dicts in enumerate(\n                mmcv.track_iter_progress(net_outputs)):\n            annos = []\n            info = self.data_infos[idx]\n            sample_idx = info['image']['image_idx']\n            image_shape = info['image']['image_shape'][:2]\n\n            box_dict = self.convert_valid_bboxes(pred_dicts, info)\n            if len(box_dict['bbox']) > 0:\n                box_2d_preds = box_dict['bbox']\n                box_preds = box_dict['box3d_camera']\n                scores = box_dict['scores']\n                box_preds_lidar = box_dict['box3d_lidar']\n                label_preds = box_dict['label_preds']\n\n                anno = {\n                    'name': [],\n                    'truncated': [],\n                    'occluded': [],\n                    'alpha': [],\n                    'bbox': [],\n                    'dimensions': [],\n                    'location': [],\n                    'rotation_y': [],\n                    'score': []\n                }\n\n                for box, box_lidar, bbox, score, label in zip(\n                        box_preds, box_preds_lidar, box_2d_preds, scores,\n                        label_preds):\n                    bbox[2:] = np.minimum(bbox[2:], image_shape[::-1])\n                    bbox[:2] = np.maximum(bbox[:2], [0, 0])\n                    anno['name'].append(class_names[int(label)])\n                    anno['truncated'].append(0.0)\n                    anno['occluded'].append(0)\n                    anno['alpha'].append(\n                        -np.arctan2(-box_lidar[1], box_lidar[0]) + box[6])\n                    anno['bbox'].append(bbox)\n                    anno['dimensions'].append(box[3:6])\n                    anno['location'].append(box[:3])\n                    anno['rotation_y'].append(box[6])\n                    anno['score'].append(score)\n\n                anno = {k: np.stack(v) for k, v in anno.items()}\n                annos.append(anno)\n\n                if submission_prefix is not None:\n                    curr_file = f'{submission_prefix}/{sample_idx:07d}.txt'\n                    with open(curr_file, 'w') as f:\n                        bbox = anno['bbox']\n                        loc = anno['location']\n                        dims = anno['dimensions']  # lhw -> hwl\n\n                        for idx in range(len(bbox)):\n                            print(\n                                '{} -1 -1 {:.4f} {:.4f} {:.4f} {:.4f} '\n                                '{:.4f} {:.4f} {:.4f} '\n                                '{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'.\n                                format(anno['name'][idx], anno['alpha'][idx],\n                                       bbox[idx][0], bbox[idx][1],\n                                       bbox[idx][2], bbox[idx][3],\n                                       dims[idx][1], dims[idx][2],\n                                       dims[idx][0], loc[idx][0], loc[idx][1],\n                                       loc[idx][2], anno['rotation_y'][idx],\n                                       anno['score'][idx]),\n                                file=f)\n            else:\n                annos.append({\n                    'name': np.array([]),\n                    'truncated': np.array([]),\n                    'occluded': np.array([]),\n                    'alpha': np.array([]),\n                    'bbox': np.zeros([0, 4]),\n                    'dimensions': np.zeros([0, 3]),\n                    'location': np.zeros([0, 3]),\n                    'rotation_y': np.array([]),\n                    'score': np.array([]),\n                })\n            annos[-1]['sample_idx'] = np.array(\n                [sample_idx] * len(annos[-1]['score']), dtype=np.int64)\n\n            det_annos += annos\n\n        if pklfile_prefix is not None:\n            if not pklfile_prefix.endswith(('.pkl', '.pickle')):\n                out = f'{pklfile_prefix}.pkl'\n            mmcv.dump(det_annos, out)\n            print(f'Result is saved to {out}.')\n\n        return det_annos\n\n    def convert_valid_bboxes(self, box_dict, info):\n        \"\"\"Convert the boxes into valid format.\n\n        Args:\n            box_dict (dict): Bounding boxes to be converted.\n\n                - boxes_3d (:obj:``LiDARInstance3DBoxes``): 3D bounding boxes.\n                - scores_3d (np.ndarray): Scores of predicted boxes.\n                - labels_3d (np.ndarray): Class labels of predicted boxes.\n            info (dict): Dataset information dictionary.\n\n        Returns:\n            dict: Valid boxes after conversion.\n\n                - bbox (np.ndarray): 2D bounding boxes (in camera 0).\n                - box3d_camera (np.ndarray): 3D boxes in camera coordinates.\n                - box3d_lidar (np.ndarray): 3D boxes in lidar coordinates.\n                - scores (np.ndarray): Scores of predicted boxes.\n                - label_preds (np.ndarray): Class labels of predicted boxes.\n                - sample_idx (np.ndarray): Sample index.\n        \"\"\"\n        # TODO: refactor this function\n        box_preds = box_dict['boxes_3d']\n        scores = box_dict['scores_3d']\n        labels = box_dict['labels_3d']\n        sample_idx = info['image']['image_idx']\n        box_preds.limit_yaw(offset=0.5, period=np.pi * 2)\n\n        if len(box_preds) == 0:\n            return dict(\n                bbox=np.zeros([0, 4]),\n                box3d_camera=np.zeros([0, 7]),\n                box3d_lidar=np.zeros([0, 7]),\n                scores=np.zeros([0]),\n                label_preds=np.zeros([0, 4]),\n                sample_idx=sample_idx)\n\n        rect = info['calib']['R0_rect'].astype(np.float32)\n        Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)\n        P0 = info['calib']['P0'].astype(np.float32)\n        P0 = box_preds.tensor.new_tensor(P0)\n\n        box_preds_camera = box_preds.convert_to(Box3DMode.CAM, rect @ Trv2c)\n\n        box_corners = box_preds_camera.corners\n        box_corners_in_image = points_cam2img(box_corners, P0)\n        # box_corners_in_image: [N, 8, 2]\n        minxy = torch.min(box_corners_in_image, dim=1)[0]\n        maxxy = torch.max(box_corners_in_image, dim=1)[0]\n        box_2d_preds = torch.cat([minxy, maxxy], dim=1)\n        # Post-processing\n        # check box_preds\n        limit_range = box_preds.tensor.new_tensor(self.pcd_limit_range)\n        valid_pcd_inds = ((box_preds.center > limit_range[:3]) &\n                          (box_preds.center < limit_range[3:]))\n        valid_inds = valid_pcd_inds.all(-1)\n\n        if valid_inds.sum() > 0:\n            return dict(\n                bbox=box_2d_preds[valid_inds, :].numpy(),\n                box3d_camera=box_preds_camera[valid_inds].tensor.numpy(),\n                box3d_lidar=box_preds[valid_inds].tensor.numpy(),\n                scores=scores[valid_inds].numpy(),\n                label_preds=labels[valid_inds].numpy(),\n                sample_idx=sample_idx,\n            )\n        else:\n            return dict(\n                bbox=np.zeros([0, 4]),\n                box3d_camera=np.zeros([0, 7]),\n                box3d_lidar=np.zeros([0, 7]),\n                scores=np.zeros([0]),\n                label_preds=np.zeros([0, 4]),\n                sample_idx=sample_idx,\n            )\n"
  },
  {
    "path": "mmdet3d/models/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .backbones import *  # noqa: F401,F403\nfrom .builder import (BACKBONES, DETECTORS, FUSION_LAYERS, HEADS, LOSSES,\n                      MIDDLE_ENCODERS, NECKS, ROI_EXTRACTORS, SEGMENTORS,\n                      SHARED_HEADS, VOXEL_ENCODERS, build_backbone,\n                      build_detector, build_fusion_layer, build_head,\n                      build_loss, build_middle_encoder, build_model,\n                      build_neck, build_roi_extractor, build_shared_head,\n                      build_voxel_encoder)\nfrom .decode_heads import *  # noqa: F401,F403\nfrom .dense_heads import *  # noqa: F401,F403\nfrom .detectors import *  # noqa: F401,F403\nfrom .fusion_layers import *  # noqa: F401,F403\nfrom .losses import *  # noqa: F401,F403\nfrom .middle_encoders import *  # noqa: F401,F403\nfrom .model_utils import *  # noqa: F401,F403\nfrom .necks import *  # noqa: F401,F403\nfrom .roi_heads import *  # noqa: F401,F403\nfrom .segmentors import *  # noqa: F401,F403\nfrom .voxel_encoders import *  # noqa: F401,F403\nfrom .fbbev import *\n\n__all__ = [\n    'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES',\n    'DETECTORS', 'SEGMENTORS', 'VOXEL_ENCODERS', 'MIDDLE_ENCODERS',\n    'FUSION_LAYERS', 'build_backbone', 'build_neck', 'build_roi_extractor',\n    'build_shared_head', 'build_head', 'build_loss', 'build_detector',\n    'build_fusion_layer', 'build_model', 'build_middle_encoder',\n    'build_voxel_encoder'\n]\n"
  },
  {
    "path": "mmdet3d/models/backbones/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmdet.models.backbones import SSDVGG, HRNet, ResNet, ResNetV1d, ResNeXt\nfrom .dgcnn import DGCNNBackbone\nfrom .dla import DLANet\nfrom .mink_resnet import MinkResNet\nfrom .multi_backbone import MultiBackbone\nfrom .nostem_regnet import NoStemRegNet\nfrom .pointnet2_sa_msg import PointNet2SAMSG\nfrom .pointnet2_sa_ssg import PointNet2SASSG\nfrom .resnet import CustomResNet\nfrom .second import SECOND\nfrom .convnext import ConvNeXt\nfrom .vovnet import VoVNetCP\nfrom .swin import SwinTransformer\n__all__ = [\n    'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'NoStemRegNet',\n    'SECOND', 'DGCNNBackbone', 'PointNet2SASSG', 'PointNet2SAMSG',\n    'MultiBackbone', 'DLANet', 'MinkResNet', 'CustomResNet'\n]\n"
  },
  {
    "path": "mmdet3d/models/backbones/base_pointnet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\nfrom abc import ABCMeta\n\nfrom mmcv.runner import BaseModule\n\n\nclass BasePointNet(BaseModule, metaclass=ABCMeta):\n    \"\"\"Base class for PointNet.\"\"\"\n\n    def __init__(self, init_cfg=None, pretrained=None):\n        super(BasePointNet, self).__init__(init_cfg)\n        self.fp16_enabled = False\n        assert not (init_cfg and pretrained), \\\n            'init_cfg and pretrained cannot be setting at the same time'\n        if isinstance(pretrained, str):\n            warnings.warn('DeprecationWarning: pretrained is a deprecated, '\n                          'please use \"init_cfg\" instead')\n            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)\n\n    @staticmethod\n    def _split_point_feats(points):\n        \"\"\"Split coordinates and features of input points.\n\n        Args:\n            points (torch.Tensor): Point coordinates with features,\n                with shape (B, N, 3 + input_feature_dim).\n\n        Returns:\n            torch.Tensor: Coordinates of input points.\n            torch.Tensor: Features of input points.\n        \"\"\"\n        xyz = points[..., 0:3].contiguous()\n        if points.size(-1) > 3:\n            features = points[..., 3:].transpose(1, 2).contiguous()\n        else:\n            features = None\n\n        return xyz, features\n"
  },
  {
    "path": "mmdet3d/models/backbones/convnext.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom functools import partial\nfrom itertools import chain\nfrom typing import Sequence\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.utils.checkpoint as cp\nfrom mmcv.cnn.bricks import (NORM_LAYERS, DropPath, build_activation_layer,\n                             build_norm_layer)\nfrom mmcv.runner import BaseModule\nfrom mmcv.runner.base_module import ModuleList, Sequential\n\nfrom mmdet.models.builder import BACKBONES\n\n# Copyright (c) OpenMMLab. All rights reserved.\nfrom abc import ABCMeta, abstractmethod\n\nfrom mmcv.runner import BaseModule\n\n\nclass BaseBackbone(BaseModule, metaclass=ABCMeta):\n    \"\"\"Base backbone.\n    This class defines the basic functions of a backbone. Any backbone that\n    inherits this class should at least define its own `forward` function.\n    \"\"\"\n\n    def __init__(self, init_cfg=None):\n        super(BaseBackbone, self).__init__(init_cfg)\n\n    @abstractmethod\n    def forward(self, x):\n        \"\"\"Forward computation.\n        Args:\n            x (tensor | tuple[tensor]): x could be a Torch.tensor or a tuple of\n                Torch.tensor, containing input data for forward computation.\n        \"\"\"\n        pass\n\n    def train(self, mode=True):\n        \"\"\"Set module status before forward computation.\n        Args:\n            mode (bool): Whether it is train_mode or test_mode\n        \"\"\"\n        super(BaseBackbone, self).train(mode)\n\n\n@NORM_LAYERS.register_module('LN2d')\nclass LayerNorm2d(nn.LayerNorm):\n    \"\"\"LayerNorm on channels for 2d images.\n    Args:\n        num_channels (int): The number of channels of the input tensor.\n        eps (float): a value added to the denominator for numerical stability.\n            Defaults to 1e-5.\n        elementwise_affine (bool): a boolean value that when set to ``True``,\n            this module has learnable per-element affine parameters initialized\n            to ones (for weights) and zeros (for biases). Defaults to True.\n    \"\"\"\n\n    def __init__(self, num_channels: int, **kwargs) -> None:\n        super().__init__(num_channels, **kwargs)\n        self.num_channels = self.normalized_shape[0]\n\n    def forward(self, x):\n        assert x.dim() == 4, 'LayerNorm2d only supports inputs with shape ' \\\n            f'(N, C, H, W), but got tensor with shape {x.shape}'\n        return F.layer_norm(\n            x.permute(0, 2, 3, 1).contiguous(), self.normalized_shape,\n            self.weight, self.bias, self.eps).permute(0, 3, 1, 2).contiguous()\n\n\nclass ConvNeXtBlock(BaseModule):\n    \"\"\"ConvNeXt Block.\n    Args:\n        in_channels (int): The number of input channels.\n        norm_cfg (dict): The config dict for norm layers.\n            Defaults to ``dict(type='LN2d', eps=1e-6)``.\n        act_cfg (dict): The config dict for activation between pointwise\n            convolution. Defaults to ``dict(type='GELU')``.\n        mlp_ratio (float): The expansion ratio in both pointwise convolution.\n            Defaults to 4.\n        linear_pw_conv (bool): Whether to use linear layer to do pointwise\n            convolution. More details can be found in the note.\n            Defaults to True.\n        drop_path_rate (float): Stochastic depth rate. Defaults to 0.\n        layer_scale_init_value (float): Init value for Layer Scale.\n            Defaults to 1e-6.\n    Note:\n        There are two equivalent implementations:\n        1. DwConv -> LayerNorm -> 1x1 Conv -> GELU -> 1x1 Conv;\n           all outputs are in (N, C, H, W).\n        2. DwConv -> LayerNorm -> Permute to (N, H, W, C) -> Linear -> GELU\n           -> Linear; Permute back\n        As default, we use the second to align with the official repository.\n        And it may be slightly faster.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 norm_cfg=dict(type='LN2d', eps=1e-6),\n                 act_cfg=dict(type='GELU'),\n                 mlp_ratio=4.,\n                 linear_pw_conv=True,\n                 drop_path_rate=0.,\n                 layer_scale_init_value=1e-6,\n                 with_cp=False):\n        super().__init__()\n        self.with_cp = with_cp\n\n        self.depthwise_conv = nn.Conv2d(\n            in_channels,\n            in_channels,\n            kernel_size=7,\n            padding=3,\n            groups=in_channels)\n\n        self.linear_pw_conv = linear_pw_conv\n        self.norm = build_norm_layer(norm_cfg, in_channels)[1]\n\n        mid_channels = int(mlp_ratio * in_channels)\n        if self.linear_pw_conv:\n            # Use linear layer to do pointwise conv.\n            pw_conv = nn.Linear\n        else:\n            pw_conv = partial(nn.Conv2d, kernel_size=1)\n\n        self.pointwise_conv1 = pw_conv(in_channels, mid_channels)\n        self.act = build_activation_layer(act_cfg)\n        self.pointwise_conv2 = pw_conv(mid_channels, in_channels)\n\n        self.gamma = nn.Parameter(\n            layer_scale_init_value * torch.ones((in_channels)),\n            requires_grad=True) if layer_scale_init_value > 0 else None\n\n        self.drop_path = DropPath(\n            drop_path_rate) if drop_path_rate > 0. else nn.Identity()\n\n    def forward(self, x):\n\n        def _inner_forward(x):\n            shortcut = x\n            x = self.depthwise_conv(x)\n            x = self.norm(x)\n\n            if self.linear_pw_conv:\n                x = x.permute(0, 2, 3, 1)  # (N, C, H, W) -> (N, H, W, C)\n\n            x = self.pointwise_conv1(x)\n            x = self.act(x)\n            x = self.pointwise_conv2(x)\n\n            if self.linear_pw_conv:\n                x = x.permute(0, 3, 1, 2)  # permute back\n\n            if self.gamma is not None:\n                x = x.mul(self.gamma.view(1, -1, 1, 1))\n\n            x = shortcut + self.drop_path(x)\n            return x\n\n        if self.with_cp and x.requires_grad:\n            x = cp.checkpoint(_inner_forward, x)\n        else:\n            x = _inner_forward(x)\n\n        return x\n\n\n@BACKBONES.register_module()\nclass ConvNeXt(BaseBackbone):\n    \"\"\"ConvNeXt.\n    A PyTorch implementation of : `A ConvNet for the 2020s\n    <https://arxiv.org/pdf/2201.03545.pdf>`_\n    Modified from the `official repo\n    <https://github.com/facebookresearch/ConvNeXt/blob/main/models/convnext.py>`_\n    and `timm\n    <https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/convnext.py>`_.\n    Args:\n        arch (str | dict): The model's architecture. If string, it should be\n            one of architecture in ``ConvNeXt.arch_settings``. And if dict, it\n            should include the following two keys:\n            - depths (list[int]): Number of blocks at each stage.\n            - channels (list[int]): The number of channels at each stage.\n            Defaults to 'tiny'.\n        in_channels (int): Number of input image channels. Defaults to 3.\n        stem_patch_size (int): The size of one patch in the stem layer.\n            Defaults to 4.\n        norm_cfg (dict): The config dict for norm layers.\n            Defaults to ``dict(type='LN2d', eps=1e-6)``.\n        act_cfg (dict): The config dict for activation between pointwise\n            convolution. Defaults to ``dict(type='GELU')``.\n        linear_pw_conv (bool): Whether to use linear layer to do pointwise\n            convolution. Defaults to True.\n        drop_path_rate (float): Stochastic depth rate. Defaults to 0.\n        layer_scale_init_value (float): Init value for Layer Scale.\n            Defaults to 1e-6.\n        out_indices (Sequence | int): Output from which stages.\n            Defaults to -1, means the last stage.\n        frozen_stages (int): Stages to be frozen (all param fixed).\n            Defaults to 0, which means not freezing any parameters.\n        gap_before_final_norm (bool): Whether to globally average the feature\n            map before the final norm layer. In the official repo, it's only\n            used in classification task. Defaults to True.\n        with_cp (bool): Use checkpoint or not. Using checkpoint will save some\n            memory while slowing down the training speed. Defaults to False.\n        init_cfg (dict, optional): Initialization config dict\n    \"\"\"  # noqa: E501\n    arch_settings = {\n        'tiny': {\n            'depths': [3, 3, 9, 3],\n            'channels': [96, 192, 384, 768]\n        },\n        'small': {\n            'depths': [3, 3, 27, 3],\n            'channels': [96, 192, 384, 768]\n        },\n        'base': {\n            'depths': [3, 3, 27, 3],\n            'channels': [128, 256, 512, 1024]\n        },\n        'large': {\n            'depths': [3, 3, 27, 3],\n            'channels': [192, 384, 768, 1536]\n        },\n        'xlarge': {\n            'depths': [3, 3, 27, 3],\n            'channels': [256, 512, 1024, 2048]\n        },\n    }\n\n    def __init__(self,\n                 arch='tiny',\n                 in_channels=3,\n                 stem_patch_size=4,\n                 norm_cfg=dict(type='LN2d', eps=1e-6),\n                 act_cfg=dict(type='GELU'),\n                 linear_pw_conv=True,\n                 drop_path_rate=0.,\n                 layer_scale_init_value=1e-6,\n                 out_indices=-1,\n                 frozen_stages=0,\n                 gap_before_final_norm=True,\n                 with_cp=False,\n                 init_cfg=None):\n        super().__init__(init_cfg=init_cfg)\n\n        if isinstance(arch, str):\n            assert arch in self.arch_settings, \\\n                f'Unavailable arch, please choose from ' \\\n                f'({set(self.arch_settings)}) or pass a dict.'\n            arch = self.arch_settings[arch]\n        elif isinstance(arch, dict):\n            assert 'depths' in arch and 'channels' in arch, \\\n                f'The arch dict must have \"depths\" and \"channels\", ' \\\n                f'but got {list(arch.keys())}.'\n\n        self.depths = arch['depths']\n        self.channels = arch['channels']\n        assert (isinstance(self.depths, Sequence)\n                and isinstance(self.channels, Sequence)\n                and len(self.depths) == len(self.channels)), \\\n            f'The \"depths\" ({self.depths}) and \"channels\" ({self.channels}) ' \\\n            'should be both sequence with the same length.'\n\n        self.num_stages = len(self.depths)\n\n        if isinstance(out_indices, int):\n            out_indices = [out_indices]\n        assert isinstance(out_indices, Sequence), \\\n            f'\"out_indices\" must by a sequence or int, ' \\\n            f'get {type(out_indices)} instead.'\n        for i, index in enumerate(out_indices):\n            if index < 0:\n                out_indices[i] = 4 + index\n                assert out_indices[i] >= 0, f'Invalid out_indices {index}'\n        self.out_indices = out_indices\n\n        self.frozen_stages = frozen_stages\n        self.gap_before_final_norm = gap_before_final_norm\n\n        # stochastic depth decay rule\n        dpr = [\n            x.item()\n            for x in torch.linspace(0, drop_path_rate, sum(self.depths))\n        ]\n        block_idx = 0\n\n        # 4 downsample layers between stages, including the stem layer.\n        self.downsample_layers = ModuleList()\n        stem = nn.Sequential(\n            nn.Conv2d(\n                in_channels,\n                self.channels[0],\n                kernel_size=stem_patch_size,\n                stride=stem_patch_size),\n            build_norm_layer(norm_cfg, self.channels[0])[1],\n        )\n        self.downsample_layers.append(stem)\n\n        # 4 feature resolution stages, each consisting of multiple residual\n        # blocks\n        self.stages = nn.ModuleList()\n\n        for i in range(self.num_stages):\n            depth = self.depths[i]\n            channels = self.channels[i]\n\n            if i >= 1:\n                downsample_layer = nn.Sequential(\n                    LayerNorm2d(self.channels[i - 1]),\n                    nn.Conv2d(\n                        self.channels[i - 1],\n                        channels,\n                        kernel_size=2,\n                        stride=2),\n                )\n                self.downsample_layers.append(downsample_layer)\n\n            stage = Sequential(*[\n                ConvNeXtBlock(\n                    in_channels=channels,\n                    drop_path_rate=dpr[block_idx + j],\n                    norm_cfg=norm_cfg,\n                    act_cfg=act_cfg,\n                    linear_pw_conv=linear_pw_conv,\n                    layer_scale_init_value=layer_scale_init_value,\n                    with_cp=with_cp) for j in range(depth)\n            ])\n            block_idx += depth\n\n            self.stages.append(stage)\n\n            if i in self.out_indices:\n                norm_layer = build_norm_layer(norm_cfg, channels)[1]\n                self.add_module(f'norm{i}', norm_layer)\n\n        self._freeze_stages()\n\n    def forward(self, x):\n        outs = []\n        for i, stage in enumerate(self.stages):\n            x = self.downsample_layers[i](x)\n            x = stage(x)\n            # x = cp.checkpoint(stage, x)\n            if i in self.out_indices:\n                norm_layer = getattr(self, f'norm{i}')\n                if self.gap_before_final_norm:\n                    gap = x.mean([-2, -1], keepdim=True)\n                    outs.append(norm_layer(gap).flatten(1))\n                else:\n                    # The output of LayerNorm2d may be discontiguous, which\n                    # may cause some problem in the downstream tasks\n                    outs.append(norm_layer(x).contiguous())\n\n        return tuple(outs)\n\n    def _freeze_stages(self):\n        for i in range(self.frozen_stages):\n            downsample_layer = self.downsample_layers[i]\n            stage = self.stages[i]\n            downsample_layer.eval()\n            stage.eval()\n            for param in chain(downsample_layer.parameters(),\n                               stage.parameters()):\n                param.requires_grad = False\n\n    def train(self, mode=True):\n        super(ConvNeXt, self).train(mode)\n        self._freeze_stages()"
  },
  {
    "path": "mmdet3d/models/backbones/dgcnn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.runner import BaseModule, auto_fp16\nfrom torch import nn as nn\n\nfrom mmdet3d.ops import DGCNNFAModule, DGCNNGFModule\nfrom ..builder import BACKBONES\n\n\n@BACKBONES.register_module()\nclass DGCNNBackbone(BaseModule):\n    \"\"\"Backbone network for DGCNN.\n\n    Args:\n        in_channels (int): Input channels of point cloud.\n        num_samples (tuple[int], optional): The number of samples for knn or\n            ball query in each graph feature (GF) module.\n            Defaults to (20, 20, 20).\n        knn_modes (tuple[str], optional): Mode of KNN of each knn module.\n            Defaults to ('D-KNN', 'F-KNN', 'F-KNN').\n        radius (tuple[float], optional): Sampling radii of each GF module.\n            Defaults to (None, None, None).\n        gf_channels (tuple[tuple[int]], optional): Out channels of each mlp in\n            GF module. Defaults to ((64, 64), (64, 64), (64, )).\n        fa_channels (tuple[int], optional): Out channels of each mlp in FA\n            module. Defaults to (1024, ).\n        act_cfg (dict, optional): Config of activation layer.\n            Defaults to dict(type='ReLU').\n        init_cfg (dict, optional): Initialization config.\n            Defaults to None.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 num_samples=(20, 20, 20),\n                 knn_modes=('D-KNN', 'F-KNN', 'F-KNN'),\n                 radius=(None, None, None),\n                 gf_channels=((64, 64), (64, 64), (64, )),\n                 fa_channels=(1024, ),\n                 act_cfg=dict(type='ReLU'),\n                 init_cfg=None):\n        super().__init__(init_cfg=init_cfg)\n        self.num_gf = len(gf_channels)\n\n        assert len(num_samples) == len(knn_modes) == len(radius) == len(\n            gf_channels), 'Num_samples, knn_modes, radius and gf_channels \\\n            should have the same length.'\n\n        self.GF_modules = nn.ModuleList()\n        gf_in_channel = in_channels * 2\n        skip_channel_list = [gf_in_channel]  # input channel list\n\n        for gf_index in range(self.num_gf):\n            cur_gf_mlps = list(gf_channels[gf_index])\n            cur_gf_mlps = [gf_in_channel] + cur_gf_mlps\n            gf_out_channel = cur_gf_mlps[-1]\n\n            self.GF_modules.append(\n                DGCNNGFModule(\n                    mlp_channels=cur_gf_mlps,\n                    num_sample=num_samples[gf_index],\n                    knn_mode=knn_modes[gf_index],\n                    radius=radius[gf_index],\n                    act_cfg=act_cfg))\n            skip_channel_list.append(gf_out_channel)\n            gf_in_channel = gf_out_channel * 2\n\n        fa_in_channel = sum(skip_channel_list[1:])\n        cur_fa_mlps = list(fa_channels)\n        cur_fa_mlps = [fa_in_channel] + cur_fa_mlps\n\n        self.FA_module = DGCNNFAModule(\n            mlp_channels=cur_fa_mlps, act_cfg=act_cfg)\n\n    @auto_fp16(apply_to=('points', ))\n    def forward(self, points):\n        \"\"\"Forward pass.\n\n        Args:\n            points (torch.Tensor): point coordinates with features,\n                with shape (B, N, in_channels).\n\n        Returns:\n            dict[str, list[torch.Tensor]]: Outputs after graph feature (GF) and\n                feature aggregation (FA) modules.\n\n                - gf_points (list[torch.Tensor]): Outputs after each GF module.\n                - fa_points (torch.Tensor): Outputs after FA module.\n        \"\"\"\n        gf_points = [points]\n\n        for i in range(self.num_gf):\n            cur_points = self.GF_modules[i](gf_points[i])\n            gf_points.append(cur_points)\n\n        fa_points = self.FA_module(gf_points)\n\n        out = dict(gf_points=gf_points, fa_points=fa_points)\n        return out\n"
  },
  {
    "path": "mmdet3d/models/backbones/dla.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport torch\nfrom mmcv.cnn import build_conv_layer, build_norm_layer\nfrom mmcv.runner import BaseModule\nfrom torch import nn\n\nfrom ..builder import BACKBONES\n\n\ndef dla_build_norm_layer(cfg, num_features):\n    \"\"\"Build normalization layer specially designed for DLANet.\n\n    Args:\n        cfg (dict): The norm layer config, which should contain:\n\n            - type (str): Layer type.\n            - layer args: Args needed to instantiate a norm layer.\n            - requires_grad (bool, optional): Whether stop gradient updates.\n        num_features (int): Number of input channels.\n\n\n    Returns:\n        Function: Build normalization layer in mmcv.\n    \"\"\"\n    cfg_ = cfg.copy()\n    if cfg_['type'] == 'GN':\n        if num_features % 32 == 0:\n            return build_norm_layer(cfg_, num_features)\n        else:\n            assert 'num_groups' in cfg_\n            cfg_['num_groups'] = cfg_['num_groups'] // 2\n            return build_norm_layer(cfg_, num_features)\n    else:\n        return build_norm_layer(cfg_, num_features)\n\n\nclass BasicBlock(BaseModule):\n    \"\"\"BasicBlock in DLANet.\n\n    Args:\n        in_channels (int): Input feature channel.\n        out_channels (int): Output feature channel.\n        norm_cfg (dict): Dictionary to construct and config\n            norm layer.\n        conv_cfg (dict): Dictionary to construct and config\n            conv layer.\n        stride (int, optional): Conv stride. Default: 1.\n        dilation (int, optional): Conv dilation. Default: 1.\n        init_cfg (dict, optional): Initialization config.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 norm_cfg,\n                 conv_cfg,\n                 stride=1,\n                 dilation=1,\n                 init_cfg=None):\n        super(BasicBlock, self).__init__(init_cfg)\n        self.conv1 = build_conv_layer(\n            conv_cfg,\n            in_channels,\n            out_channels,\n            3,\n            stride=stride,\n            padding=dilation,\n            dilation=dilation,\n            bias=False)\n        self.norm1 = dla_build_norm_layer(norm_cfg, out_channels)[1]\n        self.relu = nn.ReLU(inplace=True)\n        self.conv2 = build_conv_layer(\n            conv_cfg,\n            out_channels,\n            out_channels,\n            3,\n            stride=1,\n            padding=dilation,\n            dilation=dilation,\n            bias=False)\n        self.norm2 = dla_build_norm_layer(norm_cfg, out_channels)[1]\n        self.stride = stride\n\n    def forward(self, x, identity=None):\n        \"\"\"Forward function.\"\"\"\n\n        if identity is None:\n            identity = x\n        out = self.conv1(x)\n        out = self.norm1(out)\n        out = self.relu(out)\n        out = self.conv2(out)\n        out = self.norm2(out)\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass Root(BaseModule):\n    \"\"\"Root in DLANet.\n\n    Args:\n        in_channels (int): Input feature channel.\n        out_channels (int): Output feature channel.\n        norm_cfg (dict): Dictionary to construct and config\n            norm layer.\n        conv_cfg (dict): Dictionary to construct and config\n            conv layer.\n        kernel_size (int): Size of convolution kernel.\n        add_identity (bool): Whether to add identity in root.\n        init_cfg (dict, optional): Initialization config.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 norm_cfg,\n                 conv_cfg,\n                 kernel_size,\n                 add_identity,\n                 init_cfg=None):\n        super(Root, self).__init__(init_cfg)\n        self.conv = build_conv_layer(\n            conv_cfg,\n            in_channels,\n            out_channels,\n            1,\n            stride=1,\n            padding=(kernel_size - 1) // 2,\n            bias=False)\n        self.norm = dla_build_norm_layer(norm_cfg, out_channels)[1]\n        self.relu = nn.ReLU(inplace=True)\n        self.add_identity = add_identity\n\n    def forward(self, feat_list):\n        \"\"\"Forward function.\n\n        Args:\n            feat_list (list[torch.Tensor]): Output features from\n                multiple layers.\n        \"\"\"\n        children = feat_list\n        x = self.conv(torch.cat(feat_list, 1))\n        x = self.norm(x)\n        if self.add_identity:\n            x += children[0]\n        x = self.relu(x)\n\n        return x\n\n\nclass Tree(BaseModule):\n    \"\"\"Tree in DLANet.\n\n    Args:\n        levels (int): The level of the tree.\n        block (nn.Module): The block module in tree.\n        in_channels: Input feature channel.\n        out_channels: Output feature channel.\n        norm_cfg (dict): Dictionary to construct and config\n            norm layer.\n        conv_cfg (dict): Dictionary to construct and config\n            conv layer.\n        stride (int, optional): Convolution stride.\n            Default: 1.\n        level_root (bool, optional): whether belongs to the\n            root layer.\n        root_dim (int, optional): Root input feature channel.\n        root_kernel_size (int, optional): Size of root\n            convolution kernel. Default: 1.\n        dilation (int, optional): Conv dilation. Default: 1.\n        add_identity (bool, optional): Whether to add\n            identity in root. Default: False.\n        init_cfg (dict, optional): Initialization config.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 levels,\n                 block,\n                 in_channels,\n                 out_channels,\n                 norm_cfg,\n                 conv_cfg,\n                 stride=1,\n                 level_root=False,\n                 root_dim=None,\n                 root_kernel_size=1,\n                 dilation=1,\n                 add_identity=False,\n                 init_cfg=None):\n        super(Tree, self).__init__(init_cfg)\n        if root_dim is None:\n            root_dim = 2 * out_channels\n        if level_root:\n            root_dim += in_channels\n        if levels == 1:\n            self.root = Root(root_dim, out_channels, norm_cfg, conv_cfg,\n                             root_kernel_size, add_identity)\n            self.tree1 = block(\n                in_channels,\n                out_channels,\n                norm_cfg,\n                conv_cfg,\n                stride,\n                dilation=dilation)\n            self.tree2 = block(\n                out_channels,\n                out_channels,\n                norm_cfg,\n                conv_cfg,\n                1,\n                dilation=dilation)\n        else:\n            self.tree1 = Tree(\n                levels - 1,\n                block,\n                in_channels,\n                out_channels,\n                norm_cfg,\n                conv_cfg,\n                stride,\n                root_dim=None,\n                root_kernel_size=root_kernel_size,\n                dilation=dilation,\n                add_identity=add_identity)\n            self.tree2 = Tree(\n                levels - 1,\n                block,\n                out_channels,\n                out_channels,\n                norm_cfg,\n                conv_cfg,\n                root_dim=root_dim + out_channels,\n                root_kernel_size=root_kernel_size,\n                dilation=dilation,\n                add_identity=add_identity)\n        self.level_root = level_root\n        self.root_dim = root_dim\n        self.downsample = None\n        self.project = None\n        self.levels = levels\n        if stride > 1:\n            self.downsample = nn.MaxPool2d(stride, stride=stride)\n        if in_channels != out_channels:\n            self.project = nn.Sequential(\n                build_conv_layer(\n                    conv_cfg,\n                    in_channels,\n                    out_channels,\n                    1,\n                    stride=1,\n                    bias=False),\n                dla_build_norm_layer(norm_cfg, out_channels)[1])\n\n    def forward(self, x, identity=None, children=None):\n        children = [] if children is None else children\n        bottom = self.downsample(x) if self.downsample else x\n        identity = self.project(bottom) if self.project else bottom\n        if self.level_root:\n            children.append(bottom)\n        x1 = self.tree1(x, identity)\n        if self.levels == 1:\n            x2 = self.tree2(x1)\n            feat_list = [x2, x1] + children\n            x = self.root(feat_list)\n        else:\n            children.append(x1)\n            x = self.tree2(x1, children=children)\n        return x\n\n\n@BACKBONES.register_module()\nclass DLANet(BaseModule):\n    r\"\"\"`DLA backbone <https://arxiv.org/abs/1707.06484>`_.\n\n    Args:\n        depth (int): Depth of DLA. Default: 34.\n        in_channels (int, optional): Number of input image channels.\n            Default: 3.\n        norm_cfg (dict, optional): Dictionary to construct and config\n            norm layer. Default: None.\n        conv_cfg (dict, optional): Dictionary to construct and config\n            conv layer. Default: None.\n        layer_with_level_root (list[bool], optional): Whether to apply\n            level_root in each DLA layer, this is only used for\n            tree levels. Default: (False, True, True, True).\n        with_identity_root (bool, optional): Whether to add identity\n            in root layer. Default: False.\n        pretrained (str, optional): model pretrained path.\n            Default: None.\n        init_cfg (dict or list[dict], optional): Initialization\n            config dict. Default: None\n    \"\"\"\n    arch_settings = {\n        34: (BasicBlock, (1, 1, 1, 2, 2, 1), (16, 32, 64, 128, 256, 512)),\n    }\n\n    def __init__(self,\n                 depth,\n                 in_channels=3,\n                 out_indices=(0, 1, 2, 3, 4, 5),\n                 frozen_stages=-1,\n                 norm_cfg=None,\n                 conv_cfg=None,\n                 layer_with_level_root=(False, True, True, True),\n                 with_identity_root=False,\n                 pretrained=None,\n                 init_cfg=None):\n        super(DLANet, self).__init__(init_cfg)\n        if depth not in self.arch_settings:\n            raise KeyError(f'invalida depth {depth} for DLA')\n\n        assert not (init_cfg and pretrained), \\\n            'init_cfg and pretrained cannot be setting at the same time'\n        if isinstance(pretrained, str):\n            warnings.warn('DeprecationWarning: pretrained is a deprecated, '\n                          'please use \"init_cfg\" instead')\n            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)\n        elif pretrained is None:\n            if init_cfg is None:\n                self.init_cfg = [\n                    dict(type='Kaiming', layer='Conv2d'),\n                    dict(\n                        type='Constant',\n                        val=1,\n                        layer=['_BatchNorm', 'GroupNorm'])\n                ]\n\n        block, levels, channels = self.arch_settings[depth]\n        self.channels = channels\n        self.num_levels = len(levels)\n        self.frozen_stages = frozen_stages\n        self.out_indices = out_indices\n        assert max(out_indices) < self.num_levels\n        self.base_layer = nn.Sequential(\n            build_conv_layer(\n                conv_cfg,\n                in_channels,\n                channels[0],\n                7,\n                stride=1,\n                padding=3,\n                bias=False),\n            dla_build_norm_layer(norm_cfg, channels[0])[1],\n            nn.ReLU(inplace=True))\n\n        # DLANet first uses two conv layers then uses several\n        # Tree layers\n        for i in range(2):\n            level_layer = self._make_conv_level(\n                channels[0],\n                channels[i],\n                levels[i],\n                norm_cfg,\n                conv_cfg,\n                stride=i + 1)\n            layer_name = f'level{i}'\n            self.add_module(layer_name, level_layer)\n\n        for i in range(2, self.num_levels):\n            dla_layer = Tree(\n                levels[i],\n                block,\n                channels[i - 1],\n                channels[i],\n                norm_cfg,\n                conv_cfg,\n                2,\n                level_root=layer_with_level_root[i - 2],\n                add_identity=with_identity_root)\n            layer_name = f'level{i}'\n            self.add_module(layer_name, dla_layer)\n\n        self._freeze_stages()\n\n    def _make_conv_level(self,\n                         in_channels,\n                         out_channels,\n                         num_convs,\n                         norm_cfg,\n                         conv_cfg,\n                         stride=1,\n                         dilation=1):\n        \"\"\"Conv modules.\n\n        Args:\n            in_channels (int): Input feature channel.\n            out_channels (int): Output feature channel.\n            num_convs (int): Number of Conv module.\n            norm_cfg (dict): Dictionary to construct and config\n                norm layer.\n            conv_cfg (dict): Dictionary to construct and config\n                conv layer.\n            stride (int, optional): Conv stride. Default: 1.\n            dilation (int, optional): Conv dilation. Default: 1.\n        \"\"\"\n        modules = []\n        for i in range(num_convs):\n            modules.extend([\n                build_conv_layer(\n                    conv_cfg,\n                    in_channels,\n                    out_channels,\n                    3,\n                    stride=stride if i == 0 else 1,\n                    padding=dilation,\n                    bias=False,\n                    dilation=dilation),\n                dla_build_norm_layer(norm_cfg, out_channels)[1],\n                nn.ReLU(inplace=True)\n            ])\n            in_channels = out_channels\n        return nn.Sequential(*modules)\n\n    def _freeze_stages(self):\n        if self.frozen_stages >= 0:\n            self.base_layer.eval()\n            for param in self.base_layer.parameters():\n                param.requires_grad = False\n\n            for i in range(2):\n                m = getattr(self, f'level{i}')\n                m.eval()\n                for param in m.parameters():\n                    param.requires_grad = False\n\n        for i in range(1, self.frozen_stages + 1):\n            m = getattr(self, f'level{i+1}')\n            m.eval()\n            for param in m.parameters():\n                param.requires_grad = False\n\n    def forward(self, x):\n        outs = []\n        x = self.base_layer(x)\n        for i in range(self.num_levels):\n            x = getattr(self, 'level{}'.format(i))(x)\n            if i in self.out_indices:\n                outs.append(x)\n        return tuple(outs)\n"
  },
  {
    "path": "mmdet3d/models/backbones/load.py",
    "content": "# Copyright (c) Open-MMLab. All rights reserved.\nimport os.path as osp\nimport time\nfrom tempfile import TemporaryDirectory\n\nimport torch\nfrom torch.optim import Optimizer\n\nimport mmcv\nfrom mmcv.parallel import is_module_wrapper\nfrom mmcv.runner.checkpoint import weights_to_cpu, get_state_dict\n\ntry:\n    import apex\nexcept:\n    print('apex is not installed')\n\n\ndef save_checkpoint(model, filename, optimizer=None, meta=None):\n    \"\"\"Save checkpoint to file.\n    The checkpoint will have 4 fields: ``meta``, ``state_dict`` and\n    ``optimizer``, ``amp``. By default ``meta`` will contain version\n    and time info.\n    Args:\n        model (Module): Module whose params are to be saved.\n        filename (str): Checkpoint filename.\n        optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.\n        meta (dict, optional): Metadata to be saved in checkpoint.\n    \"\"\"\n    if meta is None:\n        meta = {}\n    elif not isinstance(meta, dict):\n        raise TypeError(f'meta must be a dict or None, but got {type(meta)}')\n    meta.update(mmcv_version=mmcv.__version__, time=time.asctime())\n\n    if is_module_wrapper(model):\n        model = model.module\n\n    if hasattr(model, 'CLASSES') and model.CLASSES is not None:\n        # save class name to the meta\n        meta.update(CLASSES=model.CLASSES)\n\n    checkpoint = {\n        'meta': meta,\n        'state_dict': weights_to_cpu(get_state_dict(model))\n    }\n    # save optimizer state dict in the checkpoint\n    if isinstance(optimizer, Optimizer):\n        checkpoint['optimizer'] = optimizer.state_dict()\n    elif isinstance(optimizer, dict):\n        checkpoint['optimizer'] = {}\n        for name, optim in optimizer.items():\n            checkpoint['optimizer'][name] = optim.state_dict()\n\n    # save amp state dict in the checkpoint\n    # checkpoint['amp'] = apex.amp.state_dict()\n\n    if filename.startswith('pavi://'):\n        try:\n            from pavi import modelcloud\n            from pavi.exception import NodeNotFoundError\n        except ImportError:\n            raise ImportError(\n                'Please install pavi to load checkpoint from modelcloud.')\n        model_path = filename[7:]\n        root = modelcloud.Folder()\n        model_dir, model_name = osp.split(model_path)\n        try:\n            model = modelcloud.get(model_dir)\n        except NodeNotFoundError:\n            model = root.create_training_model(model_dir)\n        with TemporaryDirectory() as tmp_dir:\n            checkpoint_file = osp.join(tmp_dir, model_name)\n            with open(checkpoint_file, 'wb') as f:\n                torch.save(checkpoint, f)\n                f.flush()\n            model.create_file(checkpoint_file, name=model_name)\n    else:\n        mmcv.mkdir_or_exist(osp.dirname(filename))\n        # immediately flush buffer\n        with open(filename, 'wb') as f:\n            torch.save(checkpoint, f)\n            f.flush()"
  },
  {
    "path": "mmdet3d/models/backbones/mink_resnet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n# Follow https://github.com/NVIDIA/MinkowskiEngine/blob/master/examples/resnet.py # noqa\n# and mmcv.cnn.ResNet\ntry:\n    import MinkowskiEngine as ME\n    from MinkowskiEngine.modules.resnet_block import BasicBlock, Bottleneck\nexcept ImportError:\n    # Please follow getting_started.md to install MinkowskiEngine.\n    # blocks are used in the static part of MinkResNet\n    BasicBlock, Bottleneck = None, None\n\nimport torch.nn as nn\n\nfrom mmdet3d.models.builder import BACKBONES\n\n\n@BACKBONES.register_module()\nclass MinkResNet(nn.Module):\n    r\"\"\"Minkowski ResNet backbone. See `4D Spatio-Temporal ConvNets\n    <https://arxiv.org/abs/1904.08755>`_ for more details.\n\n    Args:\n        depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.\n        in_channels (ont): Number of input channels, 3 for RGB.\n        num_stages (int, optional): Resnet stages. Default: 4.\n        pool (bool, optional): Add max pooling after first conv if True.\n            Default: True.\n    \"\"\"\n    arch_settings = {\n        18: (BasicBlock, (2, 2, 2, 2)),\n        34: (BasicBlock, (3, 4, 6, 3)),\n        50: (Bottleneck, (3, 4, 6, 3)),\n        101: (Bottleneck, (3, 4, 23, 3)),\n        152: (Bottleneck, (3, 8, 36, 3))\n    }\n\n    def __init__(self, depth, in_channels, num_stages=4, pool=True):\n        super(MinkResNet, self).__init__()\n        if depth not in self.arch_settings:\n            raise KeyError(f'invalid depth {depth} for resnet')\n        assert 4 >= num_stages >= 1\n        block, stage_blocks = self.arch_settings[depth]\n        stage_blocks = stage_blocks[:num_stages]\n        self.num_stages = num_stages\n        self.pool = pool\n\n        self.inplanes = 64\n        self.conv1 = ME.MinkowskiConvolution(\n            in_channels, self.inplanes, kernel_size=3, stride=2, dimension=3)\n        # May be BatchNorm is better, but we follow original implementation.\n        self.norm1 = ME.MinkowskiInstanceNorm(self.inplanes)\n        self.relu = ME.MinkowskiReLU(inplace=True)\n        if self.pool:\n            self.maxpool = ME.MinkowskiMaxPooling(\n                kernel_size=2, stride=2, dimension=3)\n\n        for i, num_blocks in enumerate(stage_blocks):\n            setattr(\n                self, f'layer{i + 1}',\n                self._make_layer(block, 64 * 2**i, stage_blocks[i], stride=2))\n\n    def init_weights(self):\n        for m in self.modules():\n            if isinstance(m, ME.MinkowskiConvolution):\n                ME.utils.kaiming_normal_(\n                    m.kernel, mode='fan_out', nonlinearity='relu')\n\n            if isinstance(m, ME.MinkowskiBatchNorm):\n                nn.init.constant_(m.bn.weight, 1)\n                nn.init.constant_(m.bn.bias, 0)\n\n    def _make_layer(self, block, planes, blocks, stride):\n        downsample = None\n        if stride != 1 or self.inplanes != planes * block.expansion:\n            downsample = nn.Sequential(\n                ME.MinkowskiConvolution(\n                    self.inplanes,\n                    planes * block.expansion,\n                    kernel_size=1,\n                    stride=stride,\n                    dimension=3),\n                ME.MinkowskiBatchNorm(planes * block.expansion))\n        layers = []\n        layers.append(\n            block(\n                self.inplanes,\n                planes,\n                stride=stride,\n                downsample=downsample,\n                dimension=3))\n        self.inplanes = planes * block.expansion\n        for i in range(1, blocks):\n            layers.append(block(self.inplanes, planes, stride=1, dimension=3))\n        return nn.Sequential(*layers)\n\n    def forward(self, x):\n        \"\"\"Forward pass of ResNet.\n\n        Args:\n            x (ME.SparseTensor): Input sparse tensor.\n\n        Returns:\n            list[ME.SparseTensor]: Output sparse tensors.\n        \"\"\"\n        x = self.conv1(x)\n        x = self.norm1(x)\n        x = self.relu(x)\n        if self.pool:\n            x = self.maxpool(x)\n        outs = []\n        for i in range(self.num_stages):\n            x = getattr(self, f'layer{i + 1}')(x)\n            outs.append(x)\n        return outs\n"
  },
  {
    "path": "mmdet3d/models/backbones/multi_backbone.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\nimport warnings\n\nimport torch\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule, auto_fp16\nfrom torch import nn as nn\n\nfrom ..builder import BACKBONES, build_backbone\n\n\n@BACKBONES.register_module()\nclass MultiBackbone(BaseModule):\n    \"\"\"MultiBackbone with different configs.\n\n    Args:\n        num_streams (int): The number of backbones.\n        backbones (list or dict): A list of backbone configs.\n        aggregation_mlp_channels (list[int]): Specify the mlp layers\n            for feature aggregation.\n        conv_cfg (dict): Config dict of convolutional layers.\n        norm_cfg (dict): Config dict of normalization layers.\n        act_cfg (dict): Config dict of activation layers.\n        suffixes (list): A list of suffixes to rename the return dict\n            for each backbone.\n    \"\"\"\n\n    def __init__(self,\n                 num_streams,\n                 backbones,\n                 aggregation_mlp_channels=None,\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=dict(type='BN1d', eps=1e-5, momentum=0.01),\n                 act_cfg=dict(type='ReLU'),\n                 suffixes=('net0', 'net1'),\n                 init_cfg=None,\n                 pretrained=None,\n                 **kwargs):\n        super().__init__(init_cfg=init_cfg)\n        assert isinstance(backbones, dict) or isinstance(backbones, list)\n        if isinstance(backbones, dict):\n            backbones_list = []\n            for ind in range(num_streams):\n                backbones_list.append(copy.deepcopy(backbones))\n            backbones = backbones_list\n\n        assert len(backbones) == num_streams\n        assert len(suffixes) == num_streams\n\n        self.backbone_list = nn.ModuleList()\n        # Rename the ret_dict with different suffixs.\n        self.suffixes = suffixes\n\n        out_channels = 0\n\n        for backbone_cfg in backbones:\n            out_channels += backbone_cfg['fp_channels'][-1][-1]\n            self.backbone_list.append(build_backbone(backbone_cfg))\n\n        # Feature aggregation layers\n        if aggregation_mlp_channels is None:\n            aggregation_mlp_channels = [\n                out_channels, out_channels // 2,\n                out_channels // len(self.backbone_list)\n            ]\n        else:\n            aggregation_mlp_channels.insert(0, out_channels)\n\n        self.aggregation_layers = nn.Sequential()\n        for i in range(len(aggregation_mlp_channels) - 1):\n            self.aggregation_layers.add_module(\n                f'layer{i}',\n                ConvModule(\n                    aggregation_mlp_channels[i],\n                    aggregation_mlp_channels[i + 1],\n                    1,\n                    padding=0,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    act_cfg=act_cfg,\n                    bias=True,\n                    inplace=True))\n\n        assert not (init_cfg and pretrained), \\\n            'init_cfg and pretrained cannot be setting at the same time'\n        if isinstance(pretrained, str):\n            warnings.warn('DeprecationWarning: pretrained is a deprecated, '\n                          'please use \"init_cfg\" instead')\n            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)\n\n    @auto_fp16()\n    def forward(self, points):\n        \"\"\"Forward pass.\n\n        Args:\n            points (torch.Tensor): point coordinates with features,\n                with shape (B, N, 3 + input_feature_dim).\n\n        Returns:\n            dict[str, list[torch.Tensor]]: Outputs from multiple backbones.\n\n                - fp_xyz[suffix] (list[torch.Tensor]): The coordinates of\n                  each fp features.\n                - fp_features[suffix] (list[torch.Tensor]): The features\n                  from each Feature Propagate Layers.\n                - fp_indices[suffix] (list[torch.Tensor]): Indices of the\n                  input points.\n                - hd_feature (torch.Tensor): The aggregation feature\n                  from multiple backbones.\n        \"\"\"\n        ret = {}\n        fp_features = []\n        for ind in range(len(self.backbone_list)):\n            cur_ret = self.backbone_list[ind](points)\n            cur_suffix = self.suffixes[ind]\n            fp_features.append(cur_ret['fp_features'][-1])\n            if cur_suffix != '':\n                for k in cur_ret.keys():\n                    cur_ret[k + '_' + cur_suffix] = cur_ret.pop(k)\n            ret.update(cur_ret)\n\n        # Combine the features here\n        hd_feature = torch.cat(fp_features, dim=1)\n        hd_feature = self.aggregation_layers(hd_feature)\n        ret['hd_feature'] = hd_feature\n        return ret\n"
  },
  {
    "path": "mmdet3d/models/backbones/nostem_regnet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmdet.models.backbones import RegNet\nfrom ..builder import BACKBONES\n\n\n@BACKBONES.register_module()\nclass NoStemRegNet(RegNet):\n    \"\"\"RegNet backbone without Stem for 3D detection.\n\n    More details can be found in `paper <https://arxiv.org/abs/2003.13678>`_ .\n\n    Args:\n        arch (dict): The parameter of RegNets.\n            - w0 (int): Initial width.\n            - wa (float): Slope of width.\n            - wm (float): Quantization parameter to quantize the width.\n            - depth (int): Depth of the backbone.\n            - group_w (int): Width of group.\n            - bot_mul (float): Bottleneck ratio, i.e. expansion of bottleneck.\n        strides (Sequence[int]): Strides of the first block of each stage.\n        base_channels (int): Base channels after stem layer.\n        in_channels (int): Number of input image channels. Normally 3.\n        dilations (Sequence[int]): Dilation of each stage.\n        out_indices (Sequence[int]): Output from which stages.\n        style (str): `pytorch` or `caffe`. If set to \"pytorch\", the stride-two\n            layer is the 3x3 conv layer, otherwise the stride-two layer is\n            the first 1x1 conv layer.\n        frozen_stages (int): Stages to be frozen (all param fixed). -1 means\n            not freezing any parameters.\n        norm_cfg (dict): Dictionary to construct and config norm layer.\n        norm_eval (bool): Whether to set norm layers to eval mode, namely,\n            freeze running stats (mean and var). Note: Effect on Batch Norm\n            and its variants only.\n        with_cp (bool): Use checkpoint or not. Using checkpoint will save some\n            memory while slowing down the training speed.\n        zero_init_residual (bool): Whether to use zero init for last norm layer\n            in resblocks to let them behave as identity.\n\n    Example:\n        >>> from mmdet3d.models import NoStemRegNet\n        >>> import torch\n        >>> self = NoStemRegNet(\n                arch=dict(\n                    w0=88,\n                    wa=26.31,\n                    wm=2.25,\n                    group_w=48,\n                    depth=25,\n                    bot_mul=1.0))\n        >>> self.eval()\n        >>> inputs = torch.rand(1, 64, 16, 16)\n        >>> level_outputs = self.forward(inputs)\n        >>> for level_out in level_outputs:\n        ...     print(tuple(level_out.shape))\n        (1, 96, 8, 8)\n        (1, 192, 4, 4)\n        (1, 432, 2, 2)\n        (1, 1008, 1, 1)\n    \"\"\"\n\n    def __init__(self, arch, init_cfg=None, **kwargs):\n        super(NoStemRegNet, self).__init__(arch, init_cfg=init_cfg, **kwargs)\n\n    def _make_stem_layer(self, in_channels, base_channels):\n        \"\"\"Override the original function that do not initialize a stem layer\n        since 3D detector's voxel encoder works like a stem layer.\"\"\"\n        return\n\n    def forward(self, x):\n        \"\"\"Forward function of backbone.\n\n        Args:\n            x (torch.Tensor): Features in shape (N, C, H, W).\n\n        Returns:\n            tuple[torch.Tensor]: Multi-scale features.\n        \"\"\"\n        outs = []\n        for i, layer_name in enumerate(self.res_layers):\n            res_layer = getattr(self, layer_name)\n            x = res_layer(x)\n            if i in self.out_indices:\n                outs.append(x)\n        return tuple(outs)\n"
  },
  {
    "path": "mmdet3d/models/backbones/pointnet2_sa_msg.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import auto_fp16\nfrom torch import nn as nn\n\nfrom mmdet3d.ops import build_sa_module\nfrom ..builder import BACKBONES\nfrom .base_pointnet import BasePointNet\n\n\n@BACKBONES.register_module()\nclass PointNet2SAMSG(BasePointNet):\n    \"\"\"PointNet2 with Multi-scale grouping.\n\n    Args:\n        in_channels (int): Input channels of point cloud.\n        num_points (tuple[int]): The number of points which each SA\n            module samples.\n        radii (tuple[float]): Sampling radii of each SA module.\n        num_samples (tuple[int]): The number of samples for ball\n            query in each SA module.\n        sa_channels (tuple[tuple[int]]): Out channels of each mlp in SA module.\n        aggregation_channels (tuple[int]): Out channels of aggregation\n            multi-scale grouping features.\n        fps_mods (tuple[int]): Mod of FPS for each SA module.\n        fps_sample_range_lists (tuple[tuple[int]]): The number of sampling\n            points which each SA module samples.\n        dilated_group (tuple[bool]): Whether to use dilated ball query for\n        out_indices (Sequence[int]): Output from which stages.\n        norm_cfg (dict): Config of normalization layer.\n        sa_cfg (dict): Config of set abstraction module, which may contain\n            the following keys and values:\n\n            - pool_mod (str): Pool method ('max' or 'avg') for SA modules.\n            - use_xyz (bool): Whether to use xyz as a part of features.\n            - normalize_xyz (bool): Whether to normalize xyz with radii in\n              each SA module.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 num_points=(2048, 1024, 512, 256),\n                 radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)),\n                 num_samples=((32, 32, 64), (32, 32, 64), (32, 32, 32)),\n                 sa_channels=(((16, 16, 32), (16, 16, 32), (32, 32, 64)),\n                              ((64, 64, 128), (64, 64, 128), (64, 96, 128)),\n                              ((128, 128, 256), (128, 192, 256), (128, 256,\n                                                                  256))),\n                 aggregation_channels=(64, 128, 256),\n                 fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')),\n                 fps_sample_range_lists=((-1), (-1), (512, -1)),\n                 dilated_group=(True, True, True),\n                 out_indices=(2, ),\n                 norm_cfg=dict(type='BN2d'),\n                 sa_cfg=dict(\n                     type='PointSAModuleMSG',\n                     pool_mod='max',\n                     use_xyz=True,\n                     normalize_xyz=False),\n                 init_cfg=None):\n        super().__init__(init_cfg=init_cfg)\n        self.num_sa = len(sa_channels)\n        self.out_indices = out_indices\n        assert max(out_indices) < self.num_sa\n        assert len(num_points) == len(radii) == len(num_samples) == len(\n            sa_channels)\n        if aggregation_channels is not None:\n            assert len(sa_channels) == len(aggregation_channels)\n        else:\n            aggregation_channels = [None] * len(sa_channels)\n\n        self.SA_modules = nn.ModuleList()\n        self.aggregation_mlps = nn.ModuleList()\n        sa_in_channel = in_channels - 3  # number of channels without xyz\n        skip_channel_list = [sa_in_channel]\n\n        for sa_index in range(self.num_sa):\n            cur_sa_mlps = list(sa_channels[sa_index])\n            sa_out_channel = 0\n            for radius_index in range(len(radii[sa_index])):\n                cur_sa_mlps[radius_index] = [sa_in_channel] + list(\n                    cur_sa_mlps[radius_index])\n                sa_out_channel += cur_sa_mlps[radius_index][-1]\n\n            if isinstance(fps_mods[sa_index], tuple):\n                cur_fps_mod = list(fps_mods[sa_index])\n            else:\n                cur_fps_mod = list([fps_mods[sa_index]])\n\n            if isinstance(fps_sample_range_lists[sa_index], tuple):\n                cur_fps_sample_range_list = list(\n                    fps_sample_range_lists[sa_index])\n            else:\n                cur_fps_sample_range_list = list(\n                    [fps_sample_range_lists[sa_index]])\n\n            self.SA_modules.append(\n                build_sa_module(\n                    num_point=num_points[sa_index],\n                    radii=radii[sa_index],\n                    sample_nums=num_samples[sa_index],\n                    mlp_channels=cur_sa_mlps,\n                    fps_mod=cur_fps_mod,\n                    fps_sample_range_list=cur_fps_sample_range_list,\n                    dilated_group=dilated_group[sa_index],\n                    norm_cfg=norm_cfg,\n                    cfg=sa_cfg,\n                    bias=True))\n            skip_channel_list.append(sa_out_channel)\n\n            cur_aggregation_channel = aggregation_channels[sa_index]\n            if cur_aggregation_channel is None:\n                self.aggregation_mlps.append(None)\n                sa_in_channel = sa_out_channel\n            else:\n                self.aggregation_mlps.append(\n                    ConvModule(\n                        sa_out_channel,\n                        cur_aggregation_channel,\n                        conv_cfg=dict(type='Conv1d'),\n                        norm_cfg=dict(type='BN1d'),\n                        kernel_size=1,\n                        bias=True))\n                sa_in_channel = cur_aggregation_channel\n\n    @auto_fp16(apply_to=('points', ))\n    def forward(self, points):\n        \"\"\"Forward pass.\n\n        Args:\n            points (torch.Tensor): point coordinates with features,\n                with shape (B, N, 3 + input_feature_dim).\n\n        Returns:\n            dict[str, torch.Tensor]: Outputs of the last SA module.\n\n                - sa_xyz (torch.Tensor): The coordinates of sa features.\n                - sa_features (torch.Tensor): The features from the\n                    last Set Aggregation Layers.\n                - sa_indices (torch.Tensor): Indices of the\n                    input points.\n        \"\"\"\n        xyz, features = self._split_point_feats(points)\n\n        batch, num_points = xyz.shape[:2]\n        indices = xyz.new_tensor(range(num_points)).unsqueeze(0).repeat(\n            batch, 1).long()\n\n        sa_xyz = [xyz]\n        sa_features = [features]\n        sa_indices = [indices]\n\n        out_sa_xyz = [xyz]\n        out_sa_features = [features]\n        out_sa_indices = [indices]\n\n        for i in range(self.num_sa):\n            cur_xyz, cur_features, cur_indices = self.SA_modules[i](\n                sa_xyz[i], sa_features[i])\n            if self.aggregation_mlps[i] is not None:\n                cur_features = self.aggregation_mlps[i](cur_features)\n            sa_xyz.append(cur_xyz)\n            sa_features.append(cur_features)\n            sa_indices.append(\n                torch.gather(sa_indices[-1], 1, cur_indices.long()))\n            if i in self.out_indices:\n                out_sa_xyz.append(sa_xyz[-1])\n                out_sa_features.append(sa_features[-1])\n                out_sa_indices.append(sa_indices[-1])\n\n        return dict(\n            sa_xyz=out_sa_xyz,\n            sa_features=out_sa_features,\n            sa_indices=out_sa_indices)\n"
  },
  {
    "path": "mmdet3d/models/backbones/pointnet2_sa_ssg.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.runner import auto_fp16\nfrom torch import nn as nn\n\nfrom mmdet3d.ops import PointFPModule, build_sa_module\nfrom ..builder import BACKBONES\nfrom .base_pointnet import BasePointNet\n\n\n@BACKBONES.register_module()\nclass PointNet2SASSG(BasePointNet):\n    \"\"\"PointNet2 with Single-scale grouping.\n\n    Args:\n        in_channels (int): Input channels of point cloud.\n        num_points (tuple[int]): The number of points which each SA\n            module samples.\n        radius (tuple[float]): Sampling radii of each SA module.\n        num_samples (tuple[int]): The number of samples for ball\n            query in each SA module.\n        sa_channels (tuple[tuple[int]]): Out channels of each mlp in SA module.\n        fp_channels (tuple[tuple[int]]): Out channels of each mlp in FP module.\n        norm_cfg (dict): Config of normalization layer.\n        sa_cfg (dict): Config of set abstraction module, which may contain\n            the following keys and values:\n\n            - pool_mod (str): Pool method ('max' or 'avg') for SA modules.\n            - use_xyz (bool): Whether to use xyz as a part of features.\n            - normalize_xyz (bool): Whether to normalize xyz with radii in\n              each SA module.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 num_points=(2048, 1024, 512, 256),\n                 radius=(0.2, 0.4, 0.8, 1.2),\n                 num_samples=(64, 32, 16, 16),\n                 sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),\n                              (128, 128, 256)),\n                 fp_channels=((256, 256), (256, 256)),\n                 norm_cfg=dict(type='BN2d'),\n                 sa_cfg=dict(\n                     type='PointSAModule',\n                     pool_mod='max',\n                     use_xyz=True,\n                     normalize_xyz=True),\n                 init_cfg=None):\n        super().__init__(init_cfg=init_cfg)\n        self.num_sa = len(sa_channels)\n        self.num_fp = len(fp_channels)\n\n        assert len(num_points) == len(radius) == len(num_samples) == len(\n            sa_channels)\n        assert len(sa_channels) >= len(fp_channels)\n\n        self.SA_modules = nn.ModuleList()\n        sa_in_channel = in_channels - 3  # number of channels without xyz\n        skip_channel_list = [sa_in_channel]\n\n        for sa_index in range(self.num_sa):\n            cur_sa_mlps = list(sa_channels[sa_index])\n            cur_sa_mlps = [sa_in_channel] + cur_sa_mlps\n            sa_out_channel = cur_sa_mlps[-1]\n\n            self.SA_modules.append(\n                build_sa_module(\n                    num_point=num_points[sa_index],\n                    radius=radius[sa_index],\n                    num_sample=num_samples[sa_index],\n                    mlp_channels=cur_sa_mlps,\n                    norm_cfg=norm_cfg,\n                    cfg=sa_cfg))\n            skip_channel_list.append(sa_out_channel)\n            sa_in_channel = sa_out_channel\n\n        self.FP_modules = nn.ModuleList()\n\n        fp_source_channel = skip_channel_list.pop()\n        fp_target_channel = skip_channel_list.pop()\n        for fp_index in range(len(fp_channels)):\n            cur_fp_mlps = list(fp_channels[fp_index])\n            cur_fp_mlps = [fp_source_channel + fp_target_channel] + cur_fp_mlps\n            self.FP_modules.append(PointFPModule(mlp_channels=cur_fp_mlps))\n            if fp_index != len(fp_channels) - 1:\n                fp_source_channel = cur_fp_mlps[-1]\n                fp_target_channel = skip_channel_list.pop()\n\n    @auto_fp16(apply_to=('points', ))\n    def forward(self, points):\n        \"\"\"Forward pass.\n\n        Args:\n            points (torch.Tensor): point coordinates with features,\n                with shape (B, N, 3 + input_feature_dim).\n\n        Returns:\n            dict[str, list[torch.Tensor]]: Outputs after SA and FP modules.\n\n                - fp_xyz (list[torch.Tensor]): The coordinates of\n                    each fp features.\n                - fp_features (list[torch.Tensor]): The features\n                    from each Feature Propagate Layers.\n                - fp_indices (list[torch.Tensor]): Indices of the\n                    input points.\n        \"\"\"\n        xyz, features = self._split_point_feats(points)\n\n        batch, num_points = xyz.shape[:2]\n        indices = xyz.new_tensor(range(num_points)).unsqueeze(0).repeat(\n            batch, 1).long()\n\n        sa_xyz = [xyz]\n        sa_features = [features]\n        sa_indices = [indices]\n\n        for i in range(self.num_sa):\n            cur_xyz, cur_features, cur_indices = self.SA_modules[i](\n                sa_xyz[i], sa_features[i])\n            sa_xyz.append(cur_xyz)\n            sa_features.append(cur_features)\n            sa_indices.append(\n                torch.gather(sa_indices[-1], 1, cur_indices.long()))\n\n        fp_xyz = [sa_xyz[-1]]\n        fp_features = [sa_features[-1]]\n        fp_indices = [sa_indices[-1]]\n\n        for i in range(self.num_fp):\n            fp_features.append(self.FP_modules[i](\n                sa_xyz[self.num_sa - i - 1], sa_xyz[self.num_sa - i],\n                sa_features[self.num_sa - i - 1], fp_features[-1]))\n            fp_xyz.append(sa_xyz[self.num_sa - i - 1])\n            fp_indices.append(sa_indices[self.num_sa - i - 1])\n\n        ret = dict(\n            fp_xyz=fp_xyz,\n            fp_features=fp_features,\n            fp_indices=fp_indices,\n            sa_xyz=sa_xyz,\n            sa_features=sa_features,\n            sa_indices=sa_indices)\n        return ret\n"
  },
  {
    "path": "mmdet3d/models/backbones/resnet.py",
    "content": "# Copyright (c) Phigent Robotics. All rights reserved.\n\nimport torch.utils.checkpoint as checkpoint\nfrom torch import nn\n\nfrom mmdet.models import BACKBONES\nfrom mmdet.models.backbones.resnet import BasicBlock, Bottleneck\n\n\n@BACKBONES.register_module()\nclass CustomResNet(nn.Module):\n\n    def __init__(\n            self,\n            numC_input,\n            num_layer=[2, 2, 2],\n            num_channels=None,\n            stride=[2, 2, 2],\n            backbone_output_ids=None,\n            norm_cfg=dict(type='BN'),\n            with_cp=False,\n            block_type='Basic',\n    ):\n        super(CustomResNet, self).__init__()\n        # build backbone\n        assert len(num_layer) == len(stride)\n        num_channels = [numC_input*2**(i+1) for i in range(len(num_layer))] \\\n            if num_channels is None else num_channels\n        self.backbone_output_ids = range(len(num_layer)) \\\n            if backbone_output_ids is None else backbone_output_ids\n        layers = []\n        if block_type == 'BottleNeck':\n            curr_numC = numC_input\n            for i in range(len(num_layer)):\n                layer = [\n                    Bottleneck(\n                        curr_numC,\n                        num_channels[i] // 4,\n                        stride=stride[i],\n                        downsample=nn.Conv2d(curr_numC, num_channels[i], 3,\n                                             stride[i], 1),\n                        norm_cfg=norm_cfg)\n                ]\n                curr_numC = num_channels[i]\n                layer.extend([\n                    Bottleneck(curr_numC, curr_numC // 4, norm_cfg=norm_cfg)\n                    for _ in range(num_layer[i] - 1)\n                ])\n                layers.append(nn.Sequential(*layer))\n        elif block_type == 'Basic':\n            curr_numC = numC_input\n            for i in range(len(num_layer)):\n                layer = [\n                    BasicBlock(\n                        curr_numC,\n                        num_channels[i],\n                        stride=stride[i],\n                        downsample=nn.Conv2d(curr_numC, num_channels[i], 3,\n                                             stride[i], 1),\n                        norm_cfg=norm_cfg)\n                ]\n                curr_numC = num_channels[i]\n                layer.extend([\n                    BasicBlock(curr_numC, curr_numC, norm_cfg=norm_cfg)\n                    for _ in range(num_layer[i] - 1)\n                ])\n                layers.append(nn.Sequential(*layer))\n        else:\n            assert False\n        self.layers = nn.Sequential(*layers)\n\n        self.with_cp = with_cp\n\n    def forward(self, x):\n        feats = []\n        x_tmp = x\n        for lid, layer in enumerate(self.layers):\n            if self.with_cp:\n                x_tmp = checkpoint.checkpoint(layer, x_tmp)\n            else:\n                x_tmp = layer(x_tmp)\n            if lid in self.backbone_output_ids:\n                feats.append(x_tmp)\n        return feats\n"
  },
  {
    "path": "mmdet3d/models/backbones/second.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nfrom mmcv.cnn import build_conv_layer, build_norm_layer\nfrom mmcv.runner import BaseModule\nfrom torch import nn as nn\n\nfrom ..builder import BACKBONES\n\n\n@BACKBONES.register_module()\nclass SECOND(BaseModule):\n    \"\"\"Backbone network for SECOND/PointPillars/PartA2/MVXNet.\n\n    Args:\n        in_channels (int): Input channels.\n        out_channels (list[int]): Output channels for multi-scale feature maps.\n        layer_nums (list[int]): Number of layers in each stage.\n        layer_strides (list[int]): Strides of each stage.\n        norm_cfg (dict): Config dict of normalization layers.\n        conv_cfg (dict): Config dict of convolutional layers.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=128,\n                 out_channels=[128, 128, 256],\n                 layer_nums=[3, 5, 5],\n                 layer_strides=[2, 2, 2],\n                 norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),\n                 conv_cfg=dict(type='Conv2d', bias=False),\n                 init_cfg=None,\n                 pretrained=None):\n        super(SECOND, self).__init__(init_cfg=init_cfg)\n        assert len(layer_strides) == len(layer_nums)\n        assert len(out_channels) == len(layer_nums)\n\n        in_filters = [in_channels, *out_channels[:-1]]\n        # note that when stride > 1, conv2d with same padding isn't\n        # equal to pad-conv2d. we should use pad-conv2d.\n        blocks = []\n        for i, layer_num in enumerate(layer_nums):\n            block = [\n                build_conv_layer(\n                    conv_cfg,\n                    in_filters[i],\n                    out_channels[i],\n                    3,\n                    stride=layer_strides[i],\n                    padding=1),\n                build_norm_layer(norm_cfg, out_channels[i])[1],\n                nn.ReLU(inplace=True),\n            ]\n            for j in range(layer_num):\n                block.append(\n                    build_conv_layer(\n                        conv_cfg,\n                        out_channels[i],\n                        out_channels[i],\n                        3,\n                        padding=1))\n                block.append(build_norm_layer(norm_cfg, out_channels[i])[1])\n                block.append(nn.ReLU(inplace=True))\n\n            block = nn.Sequential(*block)\n            blocks.append(block)\n\n        self.blocks = nn.ModuleList(blocks)\n\n        assert not (init_cfg and pretrained), \\\n            'init_cfg and pretrained cannot be setting at the same time'\n        if isinstance(pretrained, str):\n            warnings.warn('DeprecationWarning: pretrained is a deprecated, '\n                          'please use \"init_cfg\" instead')\n            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)\n        else:\n            self.init_cfg = dict(type='Kaiming', layer='Conv2d')\n\n    def forward(self, x):\n        \"\"\"Forward function.\n\n        Args:\n            x (torch.Tensor): Input with shape (N, C, H, W).\n\n        Returns:\n            tuple[torch.Tensor]: Multi-scale features.\n        \"\"\"\n        outs = []\n        for i in range(len(self.blocks)):\n            x = self.blocks[i](x)\n            outs.append(x)\n        return tuple(outs)\n"
  },
  {
    "path": "mmdet3d/models/backbones/swin.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\nfrom copy import deepcopy\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import build_norm_layer, trunc_normal_init, build_conv_layer\nfrom mmcv.cnn.bricks.transformer import FFN, build_dropout\nfrom mmcv.cnn.utils.weight_init import constant_init\nfrom mmcv.runner import _load_checkpoint\nfrom mmcv.runner.base_module import BaseModule, ModuleList\nfrom torch.nn.modules.linear import Linear\nfrom torch.nn.modules.normalization import LayerNorm\nimport torch.utils.checkpoint as checkpoint\n\nfrom mmseg.ops import resize\nfrom ...utils import get_root_logger\nfrom ..builder import BACKBONES\nfrom mmcv.cnn.bricks.registry import ATTENTION\nfrom torch.nn.modules.utils import _pair as to_2tuple\nfrom collections import OrderedDict\n\n\ndef swin_convert(ckpt):\n    new_ckpt = OrderedDict()\n\n    def correct_unfold_reduction_order(x):\n        out_channel, in_channel = x.shape\n        x = x.reshape(out_channel, 4, in_channel // 4)\n        x = x[:, [0, 2, 1, 3], :].transpose(1,\n                                            2).reshape(out_channel, in_channel)\n        return x\n\n    def correct_unfold_norm_order(x):\n        in_channel = x.shape[0]\n        x = x.reshape(4, in_channel // 4)\n        x = x[[0, 2, 1, 3], :].transpose(0, 1).reshape(in_channel)\n        return x\n\n    for k, v in ckpt.items():\n        if k.startswith('head'):\n            continue\n        elif k.startswith('layers'):\n            new_v = v\n            if 'attn.' in k:\n                new_k = k.replace('attn.', 'attn.w_msa.')\n            elif 'mlp.' in k:\n                if 'mlp.fc1.' in k:\n                    new_k = k.replace('mlp.fc1.', 'ffn.layers.0.0.')\n                elif 'mlp.fc2.' in k:\n                    new_k = k.replace('mlp.fc2.', 'ffn.layers.1.')\n                else:\n                    new_k = k.replace('mlp.', 'ffn.')\n            elif 'downsample' in k:\n                new_k = k\n                if 'reduction.' in k:\n                    new_v = correct_unfold_reduction_order(v)\n                elif 'norm.' in k:\n                    new_v = correct_unfold_norm_order(v)\n            else:\n                new_k = k\n            new_k = new_k.replace('layers', 'stages', 1)\n        elif k.startswith('patch_embed'):\n            new_v = v\n            if 'proj' in k:\n                new_k = k.replace('proj', 'projection')\n            else:\n                new_k = k\n        else:\n            new_v = v\n            new_k = k\n\n        new_ckpt[new_k] = new_v\n\n    return new_ckpt\n\n# Modified from Pytorch-Image-Models\nclass PatchEmbed(BaseModule):\n    \"\"\"Image to Patch Embedding V2.\n\n    We use a conv layer to implement PatchEmbed.\n    Args:\n        in_channels (int): The num of input channels. Default: 3\n        embed_dims (int): The dimensions of embedding. Default: 768\n        conv_type (dict, optional): The config dict for conv layers type\n            selection. Default: None.\n        kernel_size (int): The kernel_size of embedding conv. Default: 16.\n        stride (int): The slide stride of embedding conv.\n            Default: None (Default to be equal with kernel_size).\n        padding (int): The padding length of embedding conv. Default: 0.\n        dilation (int): The dilation rate of embedding conv. Default: 1.\n        pad_to_patch_size (bool, optional): Whether to pad feature map shape\n            to multiple patch size. Default: True.\n        norm_cfg (dict, optional): Config dict for normalization layer.\n        init_cfg (`mmcv.ConfigDict`, optional): The Config for initialization.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=3,\n                 embed_dims=768,\n                 conv_type=None,\n                 kernel_size=16,\n                 stride=16,\n                 padding=0,\n                 dilation=1,\n                 pad_to_patch_size=True,\n                 norm_cfg=None,\n                 init_cfg=None):\n        super(PatchEmbed, self).__init__()\n\n        self.embed_dims = embed_dims\n        self.init_cfg = init_cfg\n\n        if stride is None:\n            stride = kernel_size\n\n        self.pad_to_patch_size = pad_to_patch_size\n\n        # The default setting of patch size is equal to kernel size.\n        patch_size = kernel_size\n        if isinstance(patch_size, int):\n            patch_size = to_2tuple(patch_size)\n        elif isinstance(patch_size, tuple):\n            if len(patch_size) == 1:\n                patch_size = to_2tuple(patch_size[0])\n            assert len(patch_size) == 2, \\\n                f'The size of patch should have length 1 or 2, ' \\\n                f'but got {len(patch_size)}'\n\n        self.patch_size = patch_size\n\n        # Use conv layer to embed\n        conv_type = conv_type or 'Conv2d'\n        self.projection = build_conv_layer(\n            dict(type=conv_type),\n            in_channels=in_channels,\n            out_channels=embed_dims,\n            kernel_size=kernel_size,\n            stride=stride,\n            padding=padding,\n            dilation=dilation)\n\n        if norm_cfg is not None:\n            self.norm = build_norm_layer(norm_cfg, embed_dims)[1]\n        else:\n            self.norm = None\n\n    def forward(self, x):\n        H, W = x.shape[2], x.shape[3]\n\n        # TODO: Process overlapping op\n        if self.pad_to_patch_size:\n            # Modify H, W to multiple of patch size.\n            if H % self.patch_size[0] != 0:\n                x = F.pad(\n                    x, (0, 0, 0, self.patch_size[0] - H % self.patch_size[0]))\n            if W % self.patch_size[1] != 0:\n                x = F.pad(\n                    x, (0, self.patch_size[1] - W % self.patch_size[1], 0, 0))\n\n        x = self.projection(x)\n        self.DH, self.DW = x.shape[2], x.shape[3]\n        x = x.flatten(2).transpose(1, 2)\n\n        if self.norm is not None:\n            x = self.norm(x)\n\n        return x\n\n\n\nclass PatchMerging(BaseModule):\n    \"\"\"Merge patch feature map.\n\n    This layer use nn.Unfold to group feature map by kernel_size, and use norm\n    and linear layer to embed grouped feature map.\n    Args:\n        in_channels (int): The num of input channels.\n        out_channels (int): The num of output channels.\n        stride (int | tuple): the stride of the sliding length in the\n            unfold layer. Defaults: 2. (Default to be equal with kernel_size).\n        bias (bool, optional): Whether to add bias in linear layer or not.\n            Defaults: False.\n        norm_cfg (dict, optional): Config dict for normalization layer.\n            Defaults: dict(type='LN').\n        init_cfg (dict, optional): The extra config for initialization.\n            Defaults: None.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 stride=2,\n                 bias=False,\n                 norm_cfg=dict(type='LN'),\n                 init_cfg=None):\n        super().__init__(init_cfg)\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        self.stride = stride\n\n        self.sampler = nn.Unfold(\n            kernel_size=stride, dilation=1, padding=0, stride=stride)\n\n        sample_dim = stride**2 * in_channels\n\n        if norm_cfg is not None:\n            self.norm = build_norm_layer(norm_cfg, sample_dim)[1]\n        else:\n            self.norm = None\n\n        self.reduction = nn.Linear(sample_dim, out_channels, bias=bias)\n\n    def forward(self, x, hw_shape):\n        \"\"\"\n        x: x.shape -> [B, H*W, C]\n        hw_shape: (H, W)\n        \"\"\"\n        B, L, C = x.shape\n        H, W = hw_shape\n        assert L == H * W, 'input feature has wrong size'\n\n        x = x.view(B, H, W, C).permute([0, 3, 1, 2])  # B, C, H, W\n\n        # stride is fixed to be equal to kernel_size.\n        if (H % self.stride != 0) or (W % self.stride != 0):\n            x = F.pad(x, (0, W % self.stride, 0, H % self.stride))\n\n        # Use nn.Unfold to merge patch. About 25% faster than original method,\n        # but need to modify pretrained model for compatibility\n        x = self.sampler(x)  # B, 4*C, H/2*W/2\n        x = x.transpose(1, 2)  # B, H/2*W/2, 4*C\n\n        x = self.norm(x) if self.norm else x\n        x = self.reduction(x)\n\n        down_hw_shape = (H + 1) // 2, (W + 1) // 2\n        return x, down_hw_shape\n\n\n@ATTENTION.register_module()\nclass WindowMSA(BaseModule):\n    \"\"\"Window based multi-head self-attention (W-MSA) module with relative\n    position bias.\n\n    Args:\n        embed_dims (int): Number of input channels.\n        window_size (tuple[int]): The height and width of the window.\n        num_heads (int): Number of attention heads.\n        qkv_bias (bool, optional):  If True, add a learnable bias to q, k, v.\n            Default: True.\n        qk_scale (float | None, optional): Override default qk scale of\n            head_dim ** -0.5 if set. Default: None.\n        attn_drop_rate (float, optional): Dropout ratio of attention weight.\n            Default: 0.0\n        proj_drop_rate (float, optional): Dropout ratio of output. Default: 0.0\n        init_cfg (dict | None, optional): The Config for initialization.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 embed_dims,\n                 num_heads,\n                 window_size,\n                 qkv_bias=True,\n                 qk_scale=None,\n                 attn_drop_rate=0.,\n                 proj_drop_rate=0.,\n                 init_cfg=None):\n\n        super().__init__()\n        self.embed_dims = embed_dims\n        self.window_size = window_size  # Wh, Ww\n        self.num_heads = num_heads\n        head_embed_dims = embed_dims // num_heads\n        self.scale = qk_scale or head_embed_dims**-0.5\n        self.init_cfg = init_cfg\n\n        # define a parameter table of relative position bias\n        self.relative_position_bias_table = nn.Parameter(\n            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1),\n                        num_heads))  # 2*Wh-1 * 2*Ww-1, nH\n\n        # About 2x faster than original impl\n        Wh, Ww = self.window_size\n        rel_index_coords = self.double_step_seq(2 * Ww - 1, Wh, 1, Ww)\n        rel_position_index = rel_index_coords + rel_index_coords.T\n        rel_position_index = rel_position_index.flip(1).contiguous()\n        self.register_buffer('relative_position_index', rel_position_index)\n\n        self.qkv = nn.Linear(embed_dims, embed_dims * 3, bias=qkv_bias)\n        self.attn_drop = nn.Dropout(attn_drop_rate)\n        self.proj = nn.Linear(embed_dims, embed_dims)\n        self.proj_drop = nn.Dropout(proj_drop_rate)\n\n        self.softmax = nn.Softmax(dim=-1)\n\n    def init_weights(self):\n        trunc_normal_init(self.relative_position_bias_table, std=0.02)\n\n    def forward(self, x, mask=None):\n        \"\"\"\n        Args:\n\n            x (tensor): input features with shape of (num_windows*B, N, C)\n            mask (tensor | None, Optional): mask with shape of (num_windows,\n                Wh*Ww, Wh*Ww), value should be between (-inf, 0].\n        \"\"\"\n        B, N, C = x.shape\n        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads,\n                                  C // self.num_heads).permute(2, 0, 3, 1, 4)\n        q, k, v = qkv[0], qkv[1], qkv[\n            2]  # make torchscript happy (cannot use tensor as tuple)\n\n        q = q * self.scale\n        attn = (q @ k.transpose(-2, -1))\n\n        relative_position_bias = self.relative_position_bias_table[\n            self.relative_position_index.view(-1)].view(\n                self.window_size[0] * self.window_size[1],\n                self.window_size[0] * self.window_size[1],\n                -1)  # Wh*Ww,Wh*Ww,nH\n        relative_position_bias = relative_position_bias.permute(\n            2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww\n        attn = attn + relative_position_bias.unsqueeze(0)\n\n        if mask is not None:\n            nW = mask.shape[0]\n            attn = attn.view(B // nW, nW, self.num_heads, N,\n                             N) + mask.unsqueeze(1).unsqueeze(0)\n            attn = attn.view(-1, self.num_heads, N, N)\n            attn = self.softmax(attn)\n        else:\n            attn = self.softmax(attn)\n\n        attn = self.attn_drop(attn)\n\n        x = (attn @ v).transpose(1, 2).reshape(B, N, C)\n        x = self.proj(x)\n        x = self.proj_drop(x)\n        return x\n\n    @staticmethod\n    def double_step_seq(step1, len1, step2, len2):\n        seq1 = torch.arange(0, step1 * len1, step1)\n        seq2 = torch.arange(0, step2 * len2, step2)\n        return (seq1[:, None] + seq2[None, :]).reshape(1, -1)\n\n\n@ATTENTION.register_module()\nclass ShiftWindowMSA(BaseModule):\n    \"\"\"Shift Window Multihead Self-Attention Module.\n\n    Args:\n        embed_dims (int): Number of input channels.\n        num_heads (int): Number of attention heads.\n        window_size (int): The height and width of the window.\n        shift_size (int, optional): The shift step of each window towards\n            right-bottom. If zero, act as regular window-msa. Defaults to 0.\n        qkv_bias (bool, optional): If True, add a learnable bias to q, k, v.\n            Default: True\n        qk_scale (float | None, optional): Override default qk scale of\n            head_dim ** -0.5 if set. Defaults: None.\n        attn_drop_rate (float, optional): Dropout ratio of attention weight.\n            Defaults: 0.\n        proj_drop_rate (float, optional): Dropout ratio of output.\n            Defaults: 0.\n        dropout_layer (dict, optional): The dropout_layer used before output.\n            Defaults: dict(type='DropPath', drop_prob=0.).\n        init_cfg (dict, optional): The extra config for initialization.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 embed_dims,\n                 num_heads,\n                 window_size,\n                 shift_size=0,\n                 qkv_bias=True,\n                 qk_scale=None,\n                 attn_drop_rate=0,\n                 proj_drop_rate=0,\n                 dropout_layer=dict(type='DropPath', drop_prob=0.),\n                 init_cfg=None):\n        super().__init__(init_cfg)\n\n        self.window_size = window_size\n        self.shift_size = shift_size\n        assert 0 <= self.shift_size < self.window_size\n\n        self.w_msa = WindowMSA(\n            embed_dims=embed_dims,\n            num_heads=num_heads,\n            window_size=to_2tuple(window_size),\n            qkv_bias=qkv_bias,\n            qk_scale=qk_scale,\n            attn_drop_rate=attn_drop_rate,\n            proj_drop_rate=proj_drop_rate,\n            init_cfg=None)\n\n        self.drop = build_dropout(dropout_layer)\n\n    def forward(self, query, hw_shape):\n        B, L, C = query.shape\n        H, W = hw_shape\n        assert L == H * W, 'input feature has wrong size'\n        query = query.view(B, H, W, C)\n\n        # pad feature maps to multiples of window size\n        pad_r = (self.window_size - W % self.window_size) % self.window_size\n        pad_b = (self.window_size - H % self.window_size) % self.window_size\n        query = F.pad(query, (0, 0, 0, pad_r, 0, pad_b))\n        H_pad, W_pad = query.shape[1], query.shape[2]\n\n        # cyclic shift\n        if self.shift_size > 0:\n            shifted_query = torch.roll(\n                query,\n                shifts=(-self.shift_size, -self.shift_size),\n                dims=(1, 2))\n\n            # calculate attention mask for SW-MSA\n            img_mask = torch.zeros((1, H_pad, W_pad, 1),\n                                   device=query.device)  # 1 H W 1\n            h_slices = (slice(0, -self.window_size),\n                        slice(-self.window_size,\n                              -self.shift_size), slice(-self.shift_size, None))\n            w_slices = (slice(0, -self.window_size),\n                        slice(-self.window_size,\n                              -self.shift_size), slice(-self.shift_size, None))\n            cnt = 0\n            for h in h_slices:\n                for w in w_slices:\n                    img_mask[:, h, w, :] = cnt\n                    cnt += 1\n\n            # nW, window_size, window_size, 1\n            mask_windows = self.window_partition(img_mask)\n            mask_windows = mask_windows.view(\n                -1, self.window_size * self.window_size)\n            attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)\n            attn_mask = attn_mask.masked_fill(attn_mask != 0,\n                                              float(-100.0)).masked_fill(\n                                                  attn_mask == 0, float(0.0))\n        else:\n            shifted_query = query\n            attn_mask = None\n\n        # nW*B, window_size, window_size, C\n        query_windows = self.window_partition(shifted_query)\n        # nW*B, window_size*window_size, C\n        query_windows = query_windows.view(-1, self.window_size**2, C)\n\n        # W-MSA/SW-MSA (nW*B, window_size*window_size, C)\n        attn_windows = self.w_msa(query_windows, mask=attn_mask)\n\n        # merge windows\n        attn_windows = attn_windows.view(-1, self.window_size,\n                                         self.window_size, C)\n\n        # B H' W' C\n        shifted_x = self.window_reverse(attn_windows, H_pad, W_pad)\n        # reverse cyclic shift\n        if self.shift_size > 0:\n            x = torch.roll(\n                shifted_x,\n                shifts=(self.shift_size, self.shift_size),\n                dims=(1, 2))\n        else:\n            x = shifted_x\n\n        if pad_r > 0 or pad_b:\n            x = x[:, :H, :W, :].contiguous()\n\n        x = x.view(B, H * W, C)\n\n        x = self.drop(x)\n        return x\n\n    def window_reverse(self, windows, H, W):\n        \"\"\"\n        Args:\n            windows: (num_windows*B, window_size, window_size, C)\n            window_size (int): Window size\n            H (int): Height of image\n            W (int): Width of image\n        Returns:\n            x: (B, H, W, C)\n        \"\"\"\n        window_size = self.window_size\n        B = int(windows.shape[0] / (H * W / window_size / window_size))\n        x = windows.view(B, H // window_size, W // window_size, window_size,\n                         window_size, -1)\n        x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)\n        return x\n\n    def window_partition(self, x):\n        \"\"\"\n        Args:\n            x: (B, H, W, C)\n            window_size (int): window size\n        Returns:\n            windows: (num_windows*B, window_size, window_size, C)\n        \"\"\"\n        B, H, W, C = x.shape\n        window_size = self.window_size\n        x = x.view(B, H // window_size, window_size, W // window_size,\n                   window_size, C)\n        windows = x.permute(0, 1, 3, 2, 4, 5).contiguous()\n        windows = windows.view(-1, window_size, window_size, C)\n        return windows\n\n\nclass SwinBlock(BaseModule):\n    \"\"\"\"\n    Args:\n        embed_dims (int): The feature dimension.\n        num_heads (int): Parallel attention heads.\n        feedforward_channels (int): The hidden dimension for FFNs.\n        window size (int, optional): The local window scale. Default: 7.\n        shift (bool): whether to shift window or not. Default False.\n        qkv_bias (int, optional): enable bias for qkv if True. Default: True.\n        qk_scale (float | None, optional): Override default qk scale of\n            head_dim ** -0.5 if set. Default: None.\n        drop_rate (float, optional): Dropout rate. Default: 0.\n        attn_drop_rate (float, optional): Attention dropout rate. Default: 0.\n        drop_path_rate (float, optional): Stochastic depth rate. Default: 0.2.\n        act_cfg (dict, optional): The config dict of activation function.\n            Default: dict(type='GELU').\n        norm_cfg (dict, optional): The config dict of nomalization.\n            Default: dict(type='LN').\n        init_cfg (dict | list | None, optional): The init config.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 embed_dims,\n                 num_heads,\n                 feedforward_channels,\n                 window_size=7,\n                 shift=False,\n                 qkv_bias=True,\n                 qk_scale=None,\n                 drop_rate=0.,\n                 attn_drop_rate=0.,\n                 drop_path_rate=0.,\n                 act_cfg=dict(type='GELU'),\n                 norm_cfg=dict(type='LN'),\n                 init_cfg=None):\n\n        super(SwinBlock, self).__init__()\n\n        self.init_cfg = init_cfg\n\n        self.norm1 = build_norm_layer(norm_cfg, embed_dims)[1]\n        self.attn = ShiftWindowMSA(\n            embed_dims=embed_dims,\n            num_heads=num_heads,\n            window_size=window_size,\n            shift_size=window_size // 2 if shift else 0,\n            qkv_bias=qkv_bias,\n            qk_scale=qk_scale,\n            attn_drop_rate=attn_drop_rate,\n            proj_drop_rate=drop_rate,\n            dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate),\n            init_cfg=None)\n\n        self.norm2 = build_norm_layer(norm_cfg, embed_dims)[1]\n        self.ffn = FFN(\n            embed_dims=embed_dims,\n            feedforward_channels=feedforward_channels,\n            num_fcs=2,\n            ffn_drop=drop_rate,\n            dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate),\n            act_cfg=act_cfg,\n            add_identity=True,\n            init_cfg=None)\n\n    def forward(self, x, hw_shape):\n        identity = x\n        x = self.norm1(x)\n        x = self.attn(x, hw_shape)\n\n        x = x + identity\n\n        identity = x\n        x = self.norm2(x)\n        x = self.ffn(x, identity=identity)\n\n        return x\n\n\nclass SwinBlockSequence(BaseModule):\n    \"\"\"Implements one stage in Swin Transformer.\n\n    Args:\n        embed_dims (int): The feature dimension.\n        num_heads (int): Parallel attention heads.\n        feedforward_channels (int): The hidden dimension for FFNs.\n        depth (int): The number of blocks in this stage.\n        window size (int): The local window scale. Default: 7.\n        qkv_bias (int): enable bias for qkv if True. Default: True.\n        qk_scale (float | None, optional): Override default qk scale of\n            head_dim ** -0.5 if set. Default: None.\n        drop_rate (float, optional): Dropout rate. Default: 0.\n        attn_drop_rate (float, optional): Attention dropout rate. Default: 0.\n        drop_path_rate (float, optional): Stochastic depth rate. Default: 0.2.\n        downsample (BaseModule | None, optional): The downsample operation\n            module. Default: None.\n        act_cfg (dict, optional): The config dict of activation function.\n            Default: dict(type='GELU').\n        norm_cfg (dict, optional): The config dict of nomalization.\n            Default: dict(type='LN').\n        init_cfg (dict | list | None, optional): The init config.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 embed_dims,\n                 num_heads,\n                 feedforward_channels,\n                 depth,\n                 window_size=7,\n                 qkv_bias=True,\n                 qk_scale=None,\n                 drop_rate=0.,\n                 attn_drop_rate=0.,\n                 drop_path_rate=0.,\n                 downsample=None,\n                 act_cfg=dict(type='GELU'),\n                 norm_cfg=dict(type='LN'),\n                 init_cfg=None,\n                 with_cp=True):\n        super().__init__()\n\n        self.init_cfg = init_cfg\n\n        drop_path_rate = drop_path_rate if isinstance(\n            drop_path_rate,\n            list) else [deepcopy(drop_path_rate) for _ in range(depth)]\n\n        self.blocks = ModuleList()\n        for i in range(depth):\n            block = SwinBlock(\n                embed_dims=embed_dims,\n                num_heads=num_heads,\n                feedforward_channels=feedforward_channels,\n                window_size=window_size,\n                shift=False if i % 2 == 0 else True,\n                qkv_bias=qkv_bias,\n                qk_scale=qk_scale,\n                drop_rate=drop_rate,\n                attn_drop_rate=attn_drop_rate,\n                drop_path_rate=drop_path_rate[i],\n                act_cfg=act_cfg,\n                norm_cfg=norm_cfg,\n                init_cfg=None)\n            self.blocks.append(block)\n\n        self.downsample = downsample\n        self.with_cp = with_cp\n\n    def forward(self, x, hw_shape):\n        for block in self.blocks:\n            if self.with_cp:\n                x = checkpoint.checkpoint(block, x, hw_shape)\n            else:\n                x = block(x, hw_shape)\n\n        if self.downsample:\n            x_down, down_hw_shape = self.downsample(x, hw_shape)\n            return x_down, down_hw_shape, x, hw_shape\n        else:\n            return x, hw_shape, x, hw_shape\n\n\n@BACKBONES.register_module()\nclass SwinTransformer(BaseModule):\n    \"\"\" Swin Transformer\n    A PyTorch implement of : `Swin Transformer:\n    Hierarchical Vision Transformer using Shifted Windows`  -\n        https://arxiv.org/abs/2103.14030\n\n    Inspiration from\n    https://github.com/microsoft/Swin-Transformer\n\n    Args:\n        pretrain_img_size (int | tuple[int]): The size of input image when\n            pretrain. Defaults: 224.\n        in_channels (int): The num of input channels.\n            Defaults: 3.\n        embed_dims (int): The feature dimension. Default: 96.\n        patch_size (int | tuple[int]): Patch size. Default: 4.\n        window_size (int): Window size. Default: 7.\n        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.\n            Default: 4.\n        depths (tuple[int]): Depths of each Swin Transformer stage.\n            Default: (2, 2, 6, 2).\n        num_heads (tuple[int]): Parallel attention heads of each Swin\n            Transformer stage. Default: (3, 6, 12, 24).\n        strides (tuple[int]): The patch merging or patch embedding stride of\n            each Swin Transformer stage. (In swin, we set kernel size equal to\n            stride.) Default: (4, 2, 2, 2).\n        out_indices (tuple[int]): Output from which stages.\n            Default: (0, 1, 2, 3).\n        qkv_bias (bool, optional): If True, add a learnable bias to query, key,\n            value. Default: True\n        qk_scale (float | None, optional): Override default qk scale of\n            head_dim ** -0.5 if set. Default: None.\n        patch_norm (bool): If add a norm layer for patch embed and patch\n            merging. Default: True.\n        drop_rate (float): Dropout rate. Defaults: 0.\n        attn_drop_rate (float): Attention dropout rate. Default: 0.\n        drop_path_rate (float): Stochastic depth rate. Defaults: 0.1.\n        use_abs_pos_embed (bool): If True, add absolute position embedding to\n            the patch embedding. Defaults: False.\n        act_cfg (dict): Config dict for activation layer.\n            Default: dict(type='LN').\n        norm_cfg (dict): Config dict for normalization layer at\n            output of backone. Defaults: dict(type='LN').\n        pretrain_style (str): Choose to use official or mmcls pretrain weights.\n            Default: official.\n        pretrained (str, optional): model pretrained path. Default: None.\n        init_cfg (dict, optional): The Config for initialization.\n            Defaults to None.\n    \"\"\"\n\n    def __init__(self,\n                 pretrain_img_size=224,\n                 in_channels=3,\n                 embed_dims=96,\n                 patch_size=4,\n                 window_size=7,\n                 mlp_ratio=4,\n                 depths=(2, 2, 6, 2),\n                 num_heads=(3, 6, 12, 24),\n                 strides=(4, 2, 2, 2),\n                 out_indices=(0, 1, 2, 3),\n                 qkv_bias=True,\n                 qk_scale=None,\n                 patch_norm=True,\n                 drop_rate=0.,\n                 attn_drop_rate=0.,\n                 drop_path_rate=0.1,\n                 use_abs_pos_embed=False,\n                 act_cfg=dict(type='GELU'),\n                 norm_cfg=dict(type='LN'),\n                 pretrain_style='official',\n                 pretrained=None,\n                 init_cfg=None,\n                 with_cp=True,\n                 return_stereo_feat=False,\n                 output_missing_index_as_none=False,\n                 frozen_stages=-1):\n        super(SwinTransformer, self).__init__()\n\n        if isinstance(pretrain_img_size, int):\n            pretrain_img_size = to_2tuple(pretrain_img_size)\n        elif isinstance(pretrain_img_size, tuple):\n            if len(pretrain_img_size) == 1:\n                pretrain_img_size = to_2tuple(pretrain_img_size[0])\n            assert len(pretrain_img_size) == 2, \\\n                f'The size of image should have length 1 or 2, ' \\\n                f'but got {len(pretrain_img_size)}'\n\n        assert pretrain_style in ['official', 'mmcls'], 'We only support load '\n        'official ckpt and mmcls ckpt.'\n\n        if isinstance(pretrained, str) or pretrained is None:\n            warnings.warn('DeprecationWarning: pretrained is a deprecated, '\n                          'please use \"init_cfg\" instead')\n        else:\n            raise TypeError('pretrained must be a str or None')\n\n        num_layers = len(depths)\n        self.out_indices = out_indices\n        self.use_abs_pos_embed = use_abs_pos_embed\n        self.pretrain_style = pretrain_style\n        self.pretrained = pretrained\n        self.init_cfg = init_cfg\n\n        self.frozen_stages = frozen_stages\n\n        assert strides[0] == patch_size, 'Use non-overlapping patch embed.'\n\n        self.patch_embed = PatchEmbed(\n            in_channels=in_channels,\n            embed_dims=embed_dims,\n            conv_type='Conv2d',\n            kernel_size=patch_size,\n            stride=strides[0],\n            pad_to_patch_size=True,\n            norm_cfg=norm_cfg if patch_norm else None,\n            init_cfg=None)\n\n        if self.use_abs_pos_embed:\n            patch_row = pretrain_img_size[0] // patch_size\n            patch_col = pretrain_img_size[1] // patch_size\n            num_patches = patch_row * patch_col\n            self.absolute_pos_embed = nn.Parameter(\n                torch.zeros((1, num_patches, embed_dims)))\n\n        self.drop_after_pos = nn.Dropout(p=drop_rate)\n\n        # stochastic depth\n        total_depth = sum(depths)\n        dpr = [\n            x.item() for x in torch.linspace(0, drop_path_rate, total_depth)\n        ]  # stochastic depth decay rule\n\n        self.stages = ModuleList()\n        in_channels = embed_dims\n        for i in range(num_layers):\n            if i < num_layers - 1:\n                downsample = PatchMerging(\n                    in_channels=in_channels,\n                    out_channels=2 * in_channels,\n                    stride=strides[i + 1],\n                    norm_cfg=norm_cfg if patch_norm else None,\n                    init_cfg=None)\n            else:\n                downsample = None\n\n            stage = SwinBlockSequence(\n                embed_dims=in_channels,\n                num_heads=num_heads[i],\n                feedforward_channels=mlp_ratio * in_channels,\n                depth=depths[i],\n                window_size=window_size,\n                qkv_bias=qkv_bias,\n                qk_scale=qk_scale,\n                drop_rate=drop_rate,\n                attn_drop_rate=attn_drop_rate,\n                drop_path_rate=dpr[:depths[i]],\n                downsample=downsample,\n                act_cfg=act_cfg,\n                norm_cfg=norm_cfg,\n                init_cfg=None,\n                with_cp=with_cp)\n            self.stages.append(stage)\n\n            dpr = dpr[depths[i]:]\n            if downsample:\n                in_channels = downsample.out_channels\n\n        self.num_features = [int(embed_dims * 2**i) for i in range(num_layers)]\n        # Add a norm layer for each output\n        for i in out_indices:\n            layer = build_norm_layer(norm_cfg, self.num_features[i])[1]\n            layer_name = f'norm{i}'\n            self.add_module(layer_name, layer)\n        self.output_missing_index_as_none = output_missing_index_as_none\n\n        self._freeze_stages()\n        self.return_stereo_feat = return_stereo_feat\n\n    def _freeze_stages(self):\n        if self.frozen_stages >= 0:\n            self.patch_embed.eval()\n            for param in self.patch_embed.parameters():\n                param.requires_grad = False\n\n        if self.frozen_stages >= 1 and self.use_abs_pos_embed:\n            self.absolute_pos_embed.requires_grad = False\n\n        if self.frozen_stages >= 2:\n            self.drop_after_pos.eval()\n            for i in range(0, self.frozen_stages - 1):\n                m = self.stages[i]\n                m.eval()\n                for param in m.parameters():\n                    param.requires_grad = False\n\n    def init_weights(self):\n        if self.pretrained is None:\n            super().init_weights()\n            if self.use_abs_pos_embed:\n                trunc_normal_init(self.absolute_pos_embed, std=0.02)\n            for m in self.modules():\n                if isinstance(m, Linear):\n                    trunc_normal_init(m.weight, std=.02)\n                    if m.bias is not None:\n                        constant_init(m.bias, 0)\n                elif isinstance(m, LayerNorm):\n                    constant_init(m.bias, 0)\n                    constant_init(m.weight, 1.0)\n        elif isinstance(self.pretrained, str):\n            logger = get_root_logger()\n            ckpt = _load_checkpoint(\n                self.pretrained, logger=logger, map_location='cpu')\n            if 'state_dict' in ckpt:\n                state_dict = ckpt['state_dict']\n            elif 'model' in ckpt:\n                state_dict = ckpt['model']\n            else:\n                state_dict = ckpt\n\n            if self.pretrain_style == 'official':\n                state_dict = swin_convert(state_dict)\n\n            # strip prefix of state_dict\n            if list(state_dict.keys())[0].startswith('module.'):\n                state_dict = {k[7:]: v for k, v in state_dict.items()}\n            # if list(state_dict.keys())[0].startswith('backbone.'):\n            #     state_dict = {k[9:]: v for k, v in state_dict.items()}\n            # reshape absolute position embedding\n            if state_dict.get('absolute_pos_embed') is not None:\n                absolute_pos_embed = state_dict['absolute_pos_embed']\n                N1, L, C1 = absolute_pos_embed.size()\n                N2, C2, H, W = self.absolute_pos_embed.size()\n                if N1 != N2 or C1 != C2 or L != H * W:\n                    logger.warning('Error in loading absolute_pos_embed, pass')\n                else:\n                    state_dict['absolute_pos_embed'] = absolute_pos_embed.view(\n                        N2, H, W, C2).permute(0, 3, 1, 2).contiguous()\n\n            # interpolate position bias table if needed\n            relative_position_bias_table_keys = [\n                k for k in state_dict.keys()\n                if 'relative_position_bias_table' in k\n            ]\n            for table_key in relative_position_bias_table_keys:\n                table_pretrained = state_dict[table_key]\n                table_current = self.state_dict()[table_key]\n                L1, nH1 = table_pretrained.size()\n                L2, nH2 = table_current.size()\n                if nH1 != nH2:\n                    logger.warning(f'Error in loading {table_key}, pass')\n                else:\n                    if L1 != L2:\n                        S1 = int(L1**0.5)\n                        S2 = int(L2**0.5)\n                        table_pretrained_resized = resize(\n                            table_pretrained.permute(1, 0).reshape(\n                                1, nH1, S1, S1),\n                            size=(S2, S2),\n                            mode='bicubic')\n                        state_dict[table_key] = table_pretrained_resized.view(\n                            nH2, L2).permute(1, 0).contiguous()\n\n            # load state_dict\n            self.load_state_dict(state_dict, False)\n\n    def forward(self, x):\n        x = self.patch_embed(x)\n\n        hw_shape = (self.patch_embed.DH, self.patch_embed.DW)\n        if self.use_abs_pos_embed:\n            x = x + self.absolute_pos_embed\n        x = self.drop_after_pos(x)\n\n        outs = []\n        for i, stage in enumerate(self.stages):\n            x, hw_shape, out, out_hw_shape = stage(x, hw_shape)\n            if i == 0 and self.return_stereo_feat:\n                out = out.view(-1, *out_hw_shape,\n                               self.num_features[i]).permute(0, 3, 1,\n                                                             2).contiguous()\n                outs.append(out)\n            if i in self.out_indices:\n                norm_layer = getattr(self, f'norm{i}')\n                out = norm_layer(out)\n                out = out.view(-1, *out_hw_shape,\n                               self.num_features[i]).permute(0, 3, 1,\n                                                             2).contiguous()\n                outs.append(out)\n            elif self.output_missing_index_as_none:\n                outs.append(None)\n        return outs\n\n    def train(self, mode=True):\n        \"\"\"Convert the model into training mode while keep normalization layer\n        freezed.\"\"\"\n        super(SwinTransformer, self).train(mode)\n        self._freeze_stages()"
  },
  {
    "path": "mmdet3d/models/backbones/vovnet.py",
    "content": "# ------------------------------------------------------------------------\n# Copyright (c) 2022 megvii-model. All Rights Reserved.\n# ------------------------------------------------------------------------\n# Modified from DETR3D (https://github.com/WangYueFt/detr3d)\n# Copyright (c) 2021 Wang, Yue\n# ------------------------------------------------------------------------\n# Copyright (c) Youngwan Lee (ETRI) All Rights Reserved.\n# Copyright 2021 Toyota Research Institute.  All rights reserved.\n# ------------------------------------------------------------------------\nfrom collections import OrderedDict\nfrom mmcv.runner import BaseModule\nfrom mmdet.models.builder import BACKBONES\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torch.nn.modules.batchnorm import _BatchNorm\nimport warnings\nimport torch.utils.checkpoint as cp\n\nVoVNet19_slim_dw_eSE = {\n    'stem': [64, 64, 64],\n    'stage_conv_ch': [64, 80, 96, 112],\n    'stage_out_ch': [112, 256, 384, 512],\n    \"layer_per_block\": 3,\n    \"block_per_stage\": [1, 1, 1, 1],\n    \"eSE\": True,\n    \"dw\": True\n}\n\nVoVNet19_dw_eSE = {\n    'stem': [64, 64, 64],\n    \"stage_conv_ch\": [128, 160, 192, 224],\n    \"stage_out_ch\": [256, 512, 768, 1024],\n    \"layer_per_block\": 3,\n    \"block_per_stage\": [1, 1, 1, 1],\n    \"eSE\": True,\n    \"dw\": True\n}\n\nVoVNet19_slim_eSE = {\n    'stem': [64, 64, 128],\n    'stage_conv_ch': [64, 80, 96, 112],\n    'stage_out_ch': [112, 256, 384, 512],\n    'layer_per_block': 3,\n    'block_per_stage': [1, 1, 1, 1],\n    'eSE': True,\n    \"dw\": False\n}\n\nVoVNet19_eSE = {\n    'stem': [64, 64, 128],\n    \"stage_conv_ch\": [128, 160, 192, 224],\n    \"stage_out_ch\": [256, 512, 768, 1024],\n    \"layer_per_block\": 3,\n    \"block_per_stage\": [1, 1, 1, 1],\n    \"eSE\": True,\n    \"dw\": False\n}\n\nVoVNet39_eSE = {\n    'stem': [64, 64, 128],\n    \"stage_conv_ch\": [128, 160, 192, 224],\n    \"stage_out_ch\": [256, 512, 768, 1024],\n    \"layer_per_block\": 5,\n    \"block_per_stage\": [1, 1, 2, 2],\n    \"eSE\": True,\n    \"dw\": False\n}\n\nVoVNet57_eSE = {\n    'stem': [64, 64, 128],\n    \"stage_conv_ch\": [128, 160, 192, 224],\n    \"stage_out_ch\": [256, 512, 768, 1024],\n    \"layer_per_block\": 5,\n    \"block_per_stage\": [1, 1, 4, 3],\n    \"eSE\": True,\n    \"dw\": False\n}\n\nVoVNet99_eSE = {\n    'stem': [64, 64, 128],\n    \"stage_conv_ch\": [128, 160, 192, 224],\n    \"stage_out_ch\": [256, 512, 768, 1024],\n    \"layer_per_block\": 5,\n    \"block_per_stage\": [1, 3, 9, 3],\n    \"eSE\": True,\n    \"dw\": False\n}\n\n_STAGE_SPECS = {\n    \"V-19-slim-dw-eSE\": VoVNet19_slim_dw_eSE,\n    \"V-19-dw-eSE\": VoVNet19_dw_eSE,\n    \"V-19-slim-eSE\": VoVNet19_slim_eSE,\n    \"V-19-eSE\": VoVNet19_eSE,\n    \"V-39-eSE\": VoVNet39_eSE,\n    \"V-57-eSE\": VoVNet57_eSE,\n    \"V-99-eSE\": VoVNet99_eSE,\n}\n\n\ndef dw_conv3x3(in_channels, out_channels, module_name, postfix, stride=1, kernel_size=3, padding=1):\n    \"\"\"3x3 convolution with padding\"\"\"\n    return [\n        (\n            '{}_{}/dw_conv3x3'.format(module_name, postfix),\n            nn.Conv2d(\n                in_channels,\n                out_channels,\n                kernel_size=kernel_size,\n                stride=stride,\n                padding=padding,\n                groups=out_channels,\n                bias=False\n            )\n        ),\n        (\n            '{}_{}/pw_conv1x1'.format(module_name, postfix),\n            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, groups=1, bias=False)\n        ),\n        ('{}_{}/pw_norm'.format(module_name, postfix), nn.BatchNorm2d(out_channels)),\n        ('{}_{}/pw_relu'.format(module_name, postfix), nn.ReLU(inplace=True)),\n    ]\n\n\ndef conv3x3(in_channels, out_channels, module_name, postfix, stride=1, groups=1, kernel_size=3, padding=1):\n    \"\"\"3x3 convolution with padding\"\"\"\n    return [\n        (\n            f\"{module_name}_{postfix}/conv\",\n            nn.Conv2d(\n                in_channels,\n                out_channels,\n                kernel_size=kernel_size,\n                stride=stride,\n                padding=padding,\n                groups=groups,\n                bias=False,\n            ),\n        ),\n        (f\"{module_name}_{postfix}/norm\", nn.BatchNorm2d(out_channels)),\n        (f\"{module_name}_{postfix}/relu\", nn.ReLU(inplace=True)),\n    ]\n\n\ndef conv1x1(in_channels, out_channels, module_name, postfix, stride=1, groups=1, kernel_size=1, padding=0):\n    \"\"\"1x1 convolution with padding\"\"\"\n    return [\n        (\n            f\"{module_name}_{postfix}/conv\",\n            nn.Conv2d(\n                in_channels,\n                out_channels,\n                kernel_size=kernel_size,\n                stride=stride,\n                padding=padding,\n                groups=groups,\n                bias=False,\n            ),\n        ),\n        (f\"{module_name}_{postfix}/norm\", nn.BatchNorm2d(out_channels)),\n        (f\"{module_name}_{postfix}/relu\", nn.ReLU(inplace=True)),\n    ]\n\n\nclass Hsigmoid(nn.Module):\n    def __init__(self, inplace=True):\n        super(Hsigmoid, self).__init__()\n        self.inplace = inplace\n\n    def forward(self, x):\n        return F.relu6(x + 3.0, inplace=self.inplace) / 6.0\n\n\nclass eSEModule(nn.Module):\n    def __init__(self, channel, reduction=4):\n        super(eSEModule, self).__init__()\n        self.avg_pool = nn.AdaptiveAvgPool2d(1)\n        self.fc = nn.Conv2d(channel, channel, kernel_size=1, padding=0)\n        self.hsigmoid = Hsigmoid()\n\n    def forward(self, x):\n        input = x\n        x = self.avg_pool(x)\n        x = self.fc(x)\n        x = self.hsigmoid(x)\n        return input * x\n\n\nclass _OSA_module(nn.Module):\n    def __init__(\n        self, in_ch, stage_ch, concat_ch, layer_per_block, module_name, SE=False, identity=False, depthwise=False, with_cp=True\n    ):\n\n        super(_OSA_module, self).__init__()\n\n        self.identity = identity\n        self.depthwise = depthwise\n        self.isReduced = False\n        self.use_checkpoint = with_cp\n        self.layers = nn.ModuleList()\n        in_channel = in_ch\n        if self.depthwise and in_channel != stage_ch:\n            self.isReduced = True\n            self.conv_reduction = nn.Sequential(\n                OrderedDict(conv1x1(in_channel, stage_ch, \"{}_reduction\".format(module_name), \"0\"))\n            )\n        for i in range(layer_per_block):\n            if self.depthwise:\n                self.layers.append(nn.Sequential(OrderedDict(dw_conv3x3(stage_ch, stage_ch, module_name, i))))\n            else:\n                self.layers.append(nn.Sequential(OrderedDict(conv3x3(in_channel, stage_ch, module_name, i))))\n            in_channel = stage_ch\n\n        # feature aggregation\n        in_channel = in_ch + layer_per_block * stage_ch\n        self.concat = nn.Sequential(OrderedDict(conv1x1(in_channel, concat_ch, module_name, \"concat\")))\n\n        self.ese = eSEModule(concat_ch)\n\n    def _forward(self, x):\n\n        identity_feat = x\n\n        output = []\n        output.append(x)\n        if self.depthwise and self.isReduced:\n            x = self.conv_reduction(x)\n        for layer in self.layers:\n            x = layer(x)\n            output.append(x)\n\n        x = torch.cat(output, dim=1)\n        xt = self.concat(x)\n\n        xt = self.ese(xt)\n\n        if self.identity:\n            xt = xt + identity_feat\n\n        return xt\n\n    def forward(self, x):\n\n        if self.use_checkpoint and self.training:\n            xt = cp.checkpoint(self._forward, x)\n        else:\n            xt = self._forward(x)\n\n        return xt\n\n\nclass _OSA_stage(nn.Sequential):\n    def __init__(\n        self, in_ch, stage_ch, concat_ch, block_per_stage, layer_per_block, stage_num, SE=False, depthwise=False\n    ):\n\n        super(_OSA_stage, self).__init__()\n\n        if not stage_num == 2:\n            self.add_module(\"Pooling\", nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True))\n\n        if block_per_stage != 1:\n            SE = False\n        module_name = f\"OSA{stage_num}_1\"\n        self.add_module(\n            module_name, _OSA_module(in_ch, stage_ch, concat_ch, layer_per_block, module_name, SE, depthwise=depthwise)\n        )\n        for i in range(block_per_stage - 1):\n            if i != block_per_stage - 2:  # last block\n                SE = False\n            module_name = f\"OSA{stage_num}_{i + 2}\"\n            self.add_module(\n                module_name,\n                _OSA_module(\n                    concat_ch,\n                    stage_ch,\n                    concat_ch,\n                    layer_per_block,\n                    module_name,\n                    SE,\n                    identity=True,\n                    depthwise=depthwise\n                ),\n            )\n\n\n@BACKBONES.register_module()\nclass VoVNetCP(BaseModule):\n    def __init__(self, spec_name, input_ch=3, out_features=None, \n                 frozen_stages=-1, norm_eval=True, pretrained=None, init_cfg=None):\n        \"\"\"\n        Args:\n            input_ch(int) : the number of input channel\n            out_features (list[str]): name of the layers whose outputs should\n                be returned in forward. Can be anything in \"stem\", \"stage2\" ...\n        \"\"\"\n        super(VoVNetCP, self).__init__(init_cfg)\n        self.frozen_stages = frozen_stages\n        self.norm_eval = norm_eval\n\n        if isinstance(pretrained, str):\n            warnings.warn('DeprecationWarning: pretrained is deprecated, '\n                          'please use \"init_cfg\" instead')\n            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)\n        stage_specs = _STAGE_SPECS[spec_name]\n\n        stem_ch = stage_specs[\"stem\"]\n        config_stage_ch = stage_specs[\"stage_conv_ch\"]\n        config_concat_ch = stage_specs[\"stage_out_ch\"]\n        block_per_stage = stage_specs[\"block_per_stage\"]\n        layer_per_block = stage_specs[\"layer_per_block\"]\n        SE = stage_specs[\"eSE\"]\n        depthwise = stage_specs[\"dw\"]\n\n        self._out_features = out_features\n\n        # Stem module\n        conv_type = dw_conv3x3 if depthwise else conv3x3\n        stem = conv3x3(input_ch, stem_ch[0], \"stem\", \"1\", 2)\n        stem += conv_type(stem_ch[0], stem_ch[1], \"stem\", \"2\", 1)\n        stem += conv_type(stem_ch[1], stem_ch[2], \"stem\", \"3\", 2)\n        self.add_module(\"stem\", nn.Sequential((OrderedDict(stem))))\n        current_stirde = 4\n        self._out_feature_strides = {\"stem\": current_stirde, \"stage2\": current_stirde}\n        self._out_feature_channels = {\"stem\": stem_ch[2]}\n\n        stem_out_ch = [stem_ch[2]]\n        in_ch_list = stem_out_ch + config_concat_ch[:-1]\n        # OSA stages\n        self.stage_names = []\n        for i in range(4):  # num_stages\n            name = \"stage%d\" % (i + 2)  # stage 2 ... stage 5\n            self.stage_names.append(name)\n            self.add_module(\n                name,\n                _OSA_stage(\n                    in_ch_list[i],\n                    config_stage_ch[i],\n                    config_concat_ch[i],\n                    block_per_stage[i],\n                    layer_per_block,\n                    i + 2,\n                    SE,\n                    depthwise,\n                ),\n            )\n\n            self._out_feature_channels[name] = config_concat_ch[i]\n            if not i == 0:\n                self._out_feature_strides[name] = current_stirde = int(current_stirde * 2)\n\n        # initialize weights\n        # self._initialize_weights()\n\n    def _initialize_weights(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight)\n\n    # def forward(self, x):\n    #     outputs = {}\n    #     x = self.stem(x)\n    #     if \"stem\" in self._out_features:\n    #         outputs[\"stem\"] = x\n    #     for name in self.stage_names:\n    #         x = getattr(self, name)(x)\n    #         if name in self._out_features:\n    #             outputs[name] = x\n\n    #     return outputs\n\n    def forward(self, x):\n        outputs = []\n        x = self.stem(x)\n        if \"stem\" in self._out_features:\n            outputs.append(x)\n        for name in self.stage_names:\n            x = getattr(self, name)(x)\n            if name in self._out_features:\n                outputs.append(x)\n\n        return outputs\n\n    def _freeze_stages(self):\n        if self.frozen_stages >= 0:\n            m = getattr(self, 'stem')\n            m.eval()\n            for param in m.parameters():\n                param.requires_grad = False\n\n        for i in range(1, self.frozen_stages + 1):\n            m = getattr(self, f'stage{i+1}')\n            m.eval()\n            for param in m.parameters():\n                param.requires_grad = False\n\n    def train(self, mode=True):\n        \"\"\"Convert the model into training mode while keep normalization layer\n        freezed.\"\"\"\n        super(VoVNetCP, self).train(mode)\n        self._freeze_stages()\n        if mode and self.norm_eval:\n            for m in self.modules():\n                # trick: eval have effect on BatchNorm only\n                if isinstance(m, _BatchNorm):\n                    m.eval()"
  },
  {
    "path": "mmdet3d/models/backbones/vovnet2.py",
    "content": "from collections import OrderedDict\nfrom mmcv.runner import BaseModule\nfrom mmdet.models.builder import BACKBONES\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torch.nn.modules.batchnorm import _BatchNorm\nimport warnings\nimport torch.utils.checkpoint as cp\n\nVoVNet19_slim_dw_eSE = {\n    'stem': [64, 64, 64],\n    'stage_conv_ch': [64, 80, 96, 112],\n    'stage_out_ch': [112, 256, 384, 512],\n    \"layer_per_block\": 3,\n    \"block_per_stage\": [1, 1, 1, 1],\n    \"eSE\": True,\n    \"dw\": True\n}\n\nVoVNet19_dw_eSE = {\n    'stem': [64, 64, 64],\n    \"stage_conv_ch\": [128, 160, 192, 224],\n    \"stage_out_ch\": [256, 512, 768, 1024],\n    \"layer_per_block\": 3,\n    \"block_per_stage\": [1, 1, 1, 1],\n    \"eSE\": True,\n    \"dw\": True\n}\n\nVoVNet19_slim_eSE = {\n    'stem': [64, 64, 128],\n    'stage_conv_ch': [64, 80, 96, 112],\n    'stage_out_ch': [112, 256, 384, 512],\n    'layer_per_block': 3,\n    'block_per_stage': [1, 1, 1, 1],\n    'eSE': True,\n    \"dw\": False\n}\n\nVoVNet19_eSE = {\n    'stem': [64, 64, 128],\n    \"stage_conv_ch\": [128, 160, 192, 224],\n    \"stage_out_ch\": [256, 512, 768, 1024],\n    \"layer_per_block\": 3,\n    \"block_per_stage\": [1, 1, 1, 1],\n    \"eSE\": True,\n    \"dw\": False\n}\n\nVoVNet39_eSE = {\n    'stem': [64, 64, 128],\n    \"stage_conv_ch\": [128, 160, 192, 224],\n    \"stage_out_ch\": [256, 512, 768, 1024],\n    \"layer_per_block\": 5,\n    \"block_per_stage\": [1, 1, 2, 2],\n    \"eSE\": True,\n    \"dw\": False\n}\n\nVoVNet57_eSE = {\n    'stem': [64, 64, 128],\n    \"stage_conv_ch\": [128, 160, 192, 224],\n    \"stage_out_ch\": [256, 512, 768, 1024],\n    \"layer_per_block\": 5,\n    \"block_per_stage\": [1, 1, 4, 3],\n    \"eSE\": True,\n    \"dw\": False\n}\n\nVoVNet99_eSE = {\n    'stem': [64, 64, 128],\n    \"stage_conv_ch\": [128, 160, 192, 224],\n    \"stage_out_ch\": [256, 512, 768, 1024],\n    \"layer_per_block\": 5,\n    \"block_per_stage\": [1, 3, 9, 3],\n    \"eSE\": True,\n    \"dw\": False\n}\n\n_STAGE_SPECS = {\n    \"V-19-slim-dw-eSE\": VoVNet19_slim_dw_eSE,\n    \"V-19-dw-eSE\": VoVNet19_dw_eSE,\n    \"V-19-slim-eSE\": VoVNet19_slim_eSE,\n    \"V-19-eSE\": VoVNet19_eSE,\n    \"V-39-eSE\": VoVNet39_eSE,\n    \"V-57-eSE\": VoVNet57_eSE,\n    \"V-99-eSE\": VoVNet99_eSE,\n}\n\n\ndef dw_conv3x3(in_channels, out_channels, module_name, postfix, stride=1, kernel_size=3, padding=1):\n    \"\"\"3x3 convolution with padding\"\"\"\n    return [\n        (\n            '{}_{}/dw_conv3x3'.format(module_name, postfix),\n            nn.Conv2d(\n                in_channels,\n                out_channels,\n                kernel_size=kernel_size,\n                stride=stride,\n                padding=padding,\n                groups=out_channels,\n                bias=False\n            )\n        ),\n        (\n            '{}_{}/pw_conv1x1'.format(module_name, postfix),\n            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, groups=1, bias=False)\n        ),\n        ('{}_{}/pw_norm'.format(module_name, postfix), nn.BatchNorm2d(out_channels)),\n        ('{}_{}/pw_relu'.format(module_name, postfix), nn.ReLU(inplace=True)),\n    ]\n\n\ndef conv3x3(in_channels, out_channels, module_name, postfix, stride=1, groups=1, kernel_size=3, padding=1):\n    \"\"\"3x3 convolution with padding\"\"\"\n    return [\n        (\n            f\"{module_name}_{postfix}/conv\",\n            nn.Conv2d(\n                in_channels,\n                out_channels,\n                kernel_size=kernel_size,\n                stride=stride,\n                padding=padding,\n                groups=groups,\n                bias=False,\n            ),\n        ),\n        (f\"{module_name}_{postfix}/norm\", nn.BatchNorm2d(out_channels)),\n        (f\"{module_name}_{postfix}/relu\", nn.ReLU(inplace=True)),\n    ]\n\n\ndef conv1x1(in_channels, out_channels, module_name, postfix, stride=1, groups=1, kernel_size=1, padding=0):\n    \"\"\"1x1 convolution with padding\"\"\"\n    return [\n        (\n            f\"{module_name}_{postfix}/conv\",\n            nn.Conv2d(\n                in_channels,\n                out_channels,\n                kernel_size=kernel_size,\n                stride=stride,\n                padding=padding,\n                groups=groups,\n                bias=False,\n            ),\n        ),\n        (f\"{module_name}_{postfix}/norm\", nn.BatchNorm2d(out_channels)),\n        (f\"{module_name}_{postfix}/relu\", nn.ReLU(inplace=True)),\n    ]\n\n\nclass Hsigmoid(nn.Module):\n    def __init__(self, inplace=True):\n        super(Hsigmoid, self).__init__()\n        self.inplace = inplace\n\n    def forward(self, x):\n        return F.relu6(x + 3.0, inplace=self.inplace) / 6.0\n\n\nclass eSEModule(nn.Module):\n    def __init__(self, channel, reduction=4):\n        super(eSEModule, self).__init__()\n        self.avg_pool = nn.AdaptiveAvgPool2d(1)\n        self.fc = nn.Conv2d(channel, channel, kernel_size=1, padding=0)\n        self.hsigmoid = Hsigmoid()\n\n    def forward(self, x):\n        input = x\n        x = self.avg_pool(x)\n        x = self.fc(x)\n        x = self.hsigmoid(x)\n        return input * x\n\n\nclass _OSA_module(nn.Module):\n    def __init__(\n        self, in_ch, stage_ch, concat_ch, layer_per_block, module_name, SE=False, identity=False, depthwise=False, with_cp=True\n    ):\n\n        super(_OSA_module, self).__init__()\n\n        self.identity = identity\n        self.depthwise = depthwise\n        self.isReduced = False\n        self.use_checkpoint = with_cp\n        self.layers = nn.ModuleList()\n        in_channel = in_ch\n        if self.depthwise and in_channel != stage_ch:\n            self.isReduced = True\n            self.conv_reduction = nn.Sequential(\n                OrderedDict(conv1x1(in_channel, stage_ch, \"{}_reduction\".format(module_name), \"0\"))\n            )\n        for i in range(layer_per_block):\n            if self.depthwise:\n                self.layers.append(nn.Sequential(OrderedDict(dw_conv3x3(stage_ch, stage_ch, module_name, i))))\n            else:\n                self.layers.append(nn.Sequential(OrderedDict(conv3x3(in_channel, stage_ch, module_name, i))))\n            in_channel = stage_ch\n\n        # feature aggregation\n        in_channel = in_ch + layer_per_block * stage_ch\n        self.concat = nn.Sequential(OrderedDict(conv1x1(in_channel, concat_ch, module_name, \"concat\")))\n\n        self.ese = eSEModule(concat_ch)\n\n    def _forward(self, x):\n\n        identity_feat = x\n\n        output = []\n        output.append(x)\n        if self.depthwise and self.isReduced:\n            x = self.conv_reduction(x)\n        for layer in self.layers:\n            x = layer(x)\n            output.append(x)\n\n        x = torch.cat(output, dim=1)\n        xt = self.concat(x)\n\n        xt = self.ese(xt)\n\n        if self.identity:\n            xt = xt + identity_feat\n\n        return xt\n\n    def forward(self, x):\n\n        if self.use_checkpoint and self.training:\n            xt = cp.checkpoint(self._forward, x)\n        else:\n            xt = self._forward(x)\n\n        return xt\n\n\nclass _OSA_stage(nn.Sequential):\n    def __init__(\n        self, in_ch, stage_ch, concat_ch, block_per_stage, layer_per_block, stage_num, SE=False, depthwise=False\n    ):\n\n        super(_OSA_stage, self).__init__()\n\n        if not stage_num == 2:\n            self.add_module(\"Pooling\", nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True))\n\n        if block_per_stage != 1:\n            SE = False\n        module_name = f\"OSA{stage_num}_1\"\n        self.add_module(\n            module_name, _OSA_module(in_ch, stage_ch, concat_ch, layer_per_block, module_name, SE, depthwise=depthwise)\n        )\n        for i in range(block_per_stage - 1):\n            if i != block_per_stage - 2:  # last block\n                SE = False\n            module_name = f\"OSA{stage_num}_{i + 2}\"\n            self.add_module(\n                module_name,\n                _OSA_module(\n                    concat_ch,\n                    stage_ch,\n                    concat_ch,\n                    layer_per_block,\n                    module_name,\n                    SE,\n                    identity=True,\n                    depthwise=depthwise\n                ),\n            )\n\n\n@BACKBONES.register_module()\nclass VoVNet2(BaseModule):\n    def __init__(self, spec_name, input_ch=3, out_features=None, \n                 frozen_stages=-1, norm_eval=True, pretrained=None, init_cfg=None):\n        \"\"\"\n        Args:\n            input_ch(int) : the number of input channel\n            out_features (list[str]): name of the layers whose outputs should\n                be returned in forward. Can be anything in \"stem\", \"stage2\" ...\n        \"\"\"\n        super(VoVNet2, self).__init__(init_cfg)\n        self.frozen_stages = frozen_stages\n        self.norm_eval = norm_eval\n\n        if isinstance(pretrained, str):\n            warnings.warn('DeprecationWarning: pretrained is deprecated, '\n                          'please use \"init_cfg\" instead')\n            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)\n        stage_specs = _STAGE_SPECS[spec_name]\n\n        stem_ch = stage_specs[\"stem\"]\n        config_stage_ch = stage_specs[\"stage_conv_ch\"]\n        config_concat_ch = stage_specs[\"stage_out_ch\"]\n        block_per_stage = stage_specs[\"block_per_stage\"]\n        layer_per_block = stage_specs[\"layer_per_block\"]\n        SE = stage_specs[\"eSE\"]\n        depthwise = stage_specs[\"dw\"]\n\n        self._out_features = out_features\n\n        # Stem module\n        conv_type = dw_conv3x3 if depthwise else conv3x3\n        stem = conv3x3(input_ch, stem_ch[0], \"stem\", \"1\", 2)\n        stem += conv_type(stem_ch[0], stem_ch[1], \"stem\", \"2\", 1)\n        stem += conv_type(stem_ch[1], stem_ch[2], \"stem\", \"3\", 2)\n        self.add_module(\"stem\", nn.Sequential((OrderedDict(stem))))\n        current_stirde = 4\n        self._out_feature_strides = {\"stem\": current_stirde, \"stage2\": current_stirde}\n        self._out_feature_channels = {\"stem\": stem_ch[2]}\n\n        stem_out_ch = [stem_ch[2]]\n        in_ch_list = stem_out_ch + config_concat_ch[:-1]\n        # OSA stages\n        self.stage_names = []\n        for i in range(4):  # num_stages\n            name = \"stage%d\" % (i + 2)  # stage 2 ... stage 5\n            self.stage_names.append(name)\n            self.add_module(\n                name,\n                _OSA_stage(\n                    in_ch_list[i],\n                    config_stage_ch[i],\n                    config_concat_ch[i],\n                    block_per_stage[i],\n                    layer_per_block,\n                    i + 2,\n                    SE,\n                    depthwise,\n                ),\n            )\n\n            self._out_feature_channels[name] = config_concat_ch[i]\n            if not i == 0:\n                self._out_feature_strides[name] = current_stirde = int(current_stirde * 2)\n\n        # initialize weights\n        # self._initialize_weights()\n\n    def _initialize_weights(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight)\n\n    def forward(self, x):\n        outputs = {}\n        x = self.stem(x)\n        if \"stem\" in self._out_features:\n            outputs[\"stem\"] = x\n        for name in self.stage_names:\n            x = getattr(self, name)(x)\n            if name in self._out_features:\n                outputs[name] = x\n\n        return outputs\n\n    def _freeze_stages(self):\n        if self.frozen_stages >= 0:\n            m = getattr(self, 'stem')\n            m.eval()\n            for param in m.parameters():\n                param.requires_grad = False\n\n        for i in range(1, self.frozen_stages + 1):\n            m = getattr(self, f'stage{i+1}')\n            m.eval()\n            for param in m.parameters():\n                param.requires_grad = False\n\n    def train(self, mode=True):\n        \"\"\"Convert the model into training mode while keep normalization layer\n        freezed.\"\"\"\n        super(VoVNet2, self).train(mode)\n        self._freeze_stages()\n        if mode and self.norm_eval:\n            for m in self.modules():\n                # trick: eval have effect on BatchNorm only\n                if isinstance(m, _BatchNorm):\n                    m.eval()"
  },
  {
    "path": "mmdet3d/models/builder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nfrom mmcv.cnn import MODELS as MMCV_MODELS\nfrom mmcv.utils import Registry\n\nfrom mmdet.models.builder import BACKBONES as MMDET_BACKBONES\nfrom mmdet.models.builder import DETECTORS as MMDET_DETECTORS\nfrom mmdet.models.builder import HEADS as MMDET_HEADS\nfrom mmdet.models.builder import LOSSES as MMDET_LOSSES\nfrom mmdet.models.builder import NECKS as MMDET_NECKS\nfrom mmdet.models.builder import ROI_EXTRACTORS as MMDET_ROI_EXTRACTORS\nfrom mmdet.models.builder import SHARED_HEADS as MMDET_SHARED_HEADS\nfrom mmseg.models.builder import LOSSES as MMSEG_LOSSES\n\nMODELS = Registry('models', parent=MMCV_MODELS)\n\nBACKBONES = MODELS\nNECKS = MODELS\nROI_EXTRACTORS = MODELS\nSHARED_HEADS = MODELS\nHEADS = MODELS\nLOSSES = MODELS\nDETECTORS = MODELS\nVOXEL_ENCODERS = MODELS\nMIDDLE_ENCODERS = MODELS\nFUSION_LAYERS = MODELS\nSEGMENTORS = MODELS\n\n\ndef build_backbone(cfg):\n    \"\"\"Build backbone.\"\"\"\n    if cfg['type'] in BACKBONES._module_dict.keys():\n        return BACKBONES.build(cfg)\n    else:\n        return MMDET_BACKBONES.build(cfg)\n\n\ndef build_neck(cfg):\n    \"\"\"Build neck.\"\"\"\n    if cfg['type'] in NECKS._module_dict.keys():\n        return NECKS.build(cfg)\n    else:\n        return MMDET_NECKS.build(cfg)\n\n\ndef build_roi_extractor(cfg):\n    \"\"\"Build RoI feature extractor.\"\"\"\n    if cfg['type'] in ROI_EXTRACTORS._module_dict.keys():\n        return ROI_EXTRACTORS.build(cfg)\n    else:\n        return MMDET_ROI_EXTRACTORS.build(cfg)\n\n\ndef build_shared_head(cfg):\n    \"\"\"Build shared head of detector.\"\"\"\n    if cfg['type'] in SHARED_HEADS._module_dict.keys():\n        return SHARED_HEADS.build(cfg)\n    else:\n        return MMDET_SHARED_HEADS.build(cfg)\n\n\ndef build_head(cfg):\n    \"\"\"Build head.\"\"\"\n    if cfg['type'] in HEADS._module_dict.keys():\n        return HEADS.build(cfg)\n    else:\n        return MMDET_HEADS.build(cfg)\n\n\ndef build_loss(cfg):\n    \"\"\"Build loss function.\"\"\"\n    if cfg['type'] in LOSSES._module_dict.keys():\n        return LOSSES.build(cfg)\n    elif cfg['type'] in MMDET_LOSSES._module_dict.keys():\n        return MMDET_LOSSES.build(cfg)\n    else:\n        return MMSEG_LOSSES.build(cfg)\n\n\ndef build_detector(cfg, train_cfg=None, test_cfg=None):\n    \"\"\"Build detector.\"\"\"\n    if train_cfg is not None or test_cfg is not None:\n        warnings.warn(\n            'train_cfg and test_cfg is deprecated, '\n            'please specify them in model', UserWarning)\n    assert cfg.get('train_cfg') is None or train_cfg is None, \\\n        'train_cfg specified in both outer field and model field '\n    assert cfg.get('test_cfg') is None or test_cfg is None, \\\n        'test_cfg specified in both outer field and model field '\n    if cfg['type'] in DETECTORS._module_dict.keys():\n        return DETECTORS.build(\n            cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg))\n    else:\n        return MMDET_DETECTORS.build(\n            cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg))\n\n\ndef build_segmentor(cfg, train_cfg=None, test_cfg=None):\n    \"\"\"Build segmentor.\"\"\"\n    if train_cfg is not None or test_cfg is not None:\n        warnings.warn(\n            'train_cfg and test_cfg is deprecated, '\n            'please specify them in model', UserWarning)\n    assert cfg.get('train_cfg') is None or train_cfg is None, \\\n        'train_cfg specified in both outer field and model field '\n    assert cfg.get('test_cfg') is None or test_cfg is None, \\\n        'test_cfg specified in both outer field and model field '\n    return SEGMENTORS.build(\n        cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg))\n\n\ndef build_model(cfg, train_cfg=None, test_cfg=None):\n    \"\"\"A function warpper for building 3D detector or segmentor according to\n    cfg.\n\n    Should be deprecated in the future.\n    \"\"\"\n    if cfg.type in ['EncoderDecoder3D']:\n        return build_segmentor(cfg, train_cfg=train_cfg, test_cfg=test_cfg)\n    else:\n        return build_detector(cfg, train_cfg=train_cfg, test_cfg=test_cfg)\n\n\ndef build_voxel_encoder(cfg):\n    \"\"\"Build voxel encoder.\"\"\"\n    return VOXEL_ENCODERS.build(cfg)\n\n\ndef build_middle_encoder(cfg):\n    \"\"\"Build middle level encoder.\"\"\"\n    return MIDDLE_ENCODERS.build(cfg)\n\n\ndef build_fusion_layer(cfg):\n    \"\"\"Build fusion layer.\"\"\"\n    return FUSION_LAYERS.build(cfg)\n"
  },
  {
    "path": "mmdet3d/models/decode_heads/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .dgcnn_head import DGCNNHead\nfrom .paconv_head import PAConvHead\nfrom .pointnet2_head import PointNet2Head\n\n__all__ = ['PointNet2Head', 'DGCNNHead', 'PAConvHead']\n"
  },
  {
    "path": "mmdet3d/models/decode_heads/decode_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom abc import ABCMeta, abstractmethod\n\nfrom mmcv.cnn import normal_init\nfrom mmcv.runner import BaseModule, auto_fp16, force_fp32\nfrom torch import nn as nn\n\nfrom mmseg.models.builder import build_loss\n\n\nclass Base3DDecodeHead(BaseModule, metaclass=ABCMeta):\n    \"\"\"Base class for BaseDecodeHead.\n\n    Args:\n        channels (int): Channels after modules, before conv_seg.\n        num_classes (int): Number of classes.\n        dropout_ratio (float, optional): Ratio of dropout layer. Default: 0.5.\n        conv_cfg (dict, optional): Config of conv layers.\n            Default: dict(type='Conv1d').\n        norm_cfg (dict, optional): Config of norm layers.\n            Default: dict(type='BN1d').\n        act_cfg (dict, optional): Config of activation layers.\n            Default: dict(type='ReLU').\n        loss_decode (dict, optional): Config of decode loss.\n            Default: dict(type='CrossEntropyLoss').\n        ignore_index (int, optional): The label index to be ignored.\n            When using masked BCE loss, ignore_index should be set to None.\n            Default: 255.\n    \"\"\"\n\n    def __init__(self,\n                 channels,\n                 num_classes,\n                 dropout_ratio=0.5,\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=dict(type='BN1d'),\n                 act_cfg=dict(type='ReLU'),\n                 loss_decode=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=False,\n                     class_weight=None,\n                     loss_weight=1.0),\n                 ignore_index=255,\n                 init_cfg=None):\n        super(Base3DDecodeHead, self).__init__(init_cfg=init_cfg)\n        self.channels = channels\n        self.num_classes = num_classes\n        self.dropout_ratio = dropout_ratio\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.act_cfg = act_cfg\n        self.loss_decode = build_loss(loss_decode)\n        self.ignore_index = ignore_index\n\n        self.conv_seg = nn.Conv1d(channels, num_classes, kernel_size=1)\n        if dropout_ratio > 0:\n            self.dropout = nn.Dropout(dropout_ratio)\n        else:\n            self.dropout = None\n        self.fp16_enabled = False\n\n    def init_weights(self):\n        \"\"\"Initialize weights of classification layer.\"\"\"\n        super().init_weights()\n        normal_init(self.conv_seg, mean=0, std=0.01)\n\n    @auto_fp16()\n    @abstractmethod\n    def forward(self, inputs):\n        \"\"\"Placeholder of forward function.\"\"\"\n        pass\n\n    def forward_train(self, inputs, img_metas, pts_semantic_mask, train_cfg):\n        \"\"\"Forward function for training.\n\n        Args:\n            inputs (list[torch.Tensor]): List of multi-level point features.\n            img_metas (list[dict]): Meta information of each sample.\n            pts_semantic_mask (torch.Tensor): Semantic segmentation masks\n                used if the architecture supports semantic segmentation task.\n            train_cfg (dict): The training config.\n\n        Returns:\n            dict[str, Tensor]: a dictionary of loss components\n        \"\"\"\n        seg_logits = self.forward(inputs)\n        losses = self.losses(seg_logits, pts_semantic_mask)\n        return losses\n\n    def forward_test(self, inputs, img_metas, test_cfg):\n        \"\"\"Forward function for testing.\n\n        Args:\n            inputs (list[Tensor]): List of multi-level point features.\n            img_metas (list[dict]): Meta information of each sample.\n            test_cfg (dict): The testing config.\n\n        Returns:\n            Tensor: Output segmentation map.\n        \"\"\"\n        return self.forward(inputs)\n\n    def cls_seg(self, feat):\n        \"\"\"Classify each points.\"\"\"\n        if self.dropout is not None:\n            feat = self.dropout(feat)\n        output = self.conv_seg(feat)\n        return output\n\n    @force_fp32(apply_to=('seg_logit', ))\n    def losses(self, seg_logit, seg_label):\n        \"\"\"Compute semantic segmentation loss.\n\n        Args:\n            seg_logit (torch.Tensor): Predicted per-point segmentation logits\n                of shape [B, num_classes, N].\n            seg_label (torch.Tensor): Ground-truth segmentation label of\n                shape [B, N].\n        \"\"\"\n        loss = dict()\n        loss['loss_sem_seg'] = self.loss_decode(\n            seg_logit, seg_label, ignore_index=self.ignore_index)\n        return loss\n"
  },
  {
    "path": "mmdet3d/models/decode_heads/dgcnn_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.cnn.bricks import ConvModule\n\nfrom mmdet3d.ops import DGCNNFPModule\nfrom ..builder import HEADS\nfrom .decode_head import Base3DDecodeHead\n\n\n@HEADS.register_module()\nclass DGCNNHead(Base3DDecodeHead):\n    r\"\"\"DGCNN decoder head.\n\n    Decoder head used in `DGCNN <https://arxiv.org/abs/1801.07829>`_.\n    Refer to the\n    `reimplementation code <https://github.com/AnTao97/dgcnn.pytorch>`_.\n\n    Args:\n        fp_channels (tuple[int], optional): Tuple of mlp channels in feature\n            propagation (FP) modules. Defaults to (1216, 512).\n    \"\"\"\n\n    def __init__(self, fp_channels=(1216, 512), **kwargs):\n        super(DGCNNHead, self).__init__(**kwargs)\n\n        self.FP_module = DGCNNFPModule(\n            mlp_channels=fp_channels, act_cfg=self.act_cfg)\n\n        # https://github.com/charlesq34/pointnet2/blob/master/models/pointnet2_sem_seg.py#L40\n        self.pre_seg_conv = ConvModule(\n            fp_channels[-1],\n            self.channels,\n            kernel_size=1,\n            bias=False,\n            conv_cfg=self.conv_cfg,\n            norm_cfg=self.norm_cfg,\n            act_cfg=self.act_cfg)\n\n    def _extract_input(self, feat_dict):\n        \"\"\"Extract inputs from features dictionary.\n\n        Args:\n            feat_dict (dict): Feature dict from backbone.\n\n        Returns:\n            torch.Tensor: points for decoder.\n        \"\"\"\n        fa_points = feat_dict['fa_points']\n\n        return fa_points\n\n    def forward(self, feat_dict):\n        \"\"\"Forward pass.\n\n        Args:\n            feat_dict (dict): Feature dict from backbone.\n\n        Returns:\n            torch.Tensor: Segmentation map of shape [B, num_classes, N].\n        \"\"\"\n        fa_points = self._extract_input(feat_dict)\n\n        fp_points = self.FP_module(fa_points)\n        fp_points = fp_points.transpose(1, 2).contiguous()\n        output = self.pre_seg_conv(fp_points)\n        output = self.cls_seg(output)\n\n        return output\n"
  },
  {
    "path": "mmdet3d/models/decode_heads/paconv_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.cnn.bricks import ConvModule\n\nfrom ..builder import HEADS\nfrom .pointnet2_head import PointNet2Head\n\n\n@HEADS.register_module()\nclass PAConvHead(PointNet2Head):\n    r\"\"\"PAConv decoder head.\n\n    Decoder head used in `PAConv <https://arxiv.org/abs/2103.14635>`_.\n    Refer to the `official code <https://github.com/CVMI-Lab/PAConv>`_.\n\n    Args:\n        fp_channels (tuple[tuple[int]]): Tuple of mlp channels in FP modules.\n        fp_norm_cfg (dict): Config of norm layers used in FP modules.\n            Default: dict(type='BN2d').\n    \"\"\"\n\n    def __init__(self,\n                 fp_channels=((768, 256, 256), (384, 256, 256),\n                              (320, 256, 128), (128 + 6, 128, 128, 128)),\n                 fp_norm_cfg=dict(type='BN2d'),\n                 **kwargs):\n        super(PAConvHead, self).__init__(fp_channels, fp_norm_cfg, **kwargs)\n\n        # https://github.com/CVMI-Lab/PAConv/blob/main/scene_seg/model/pointnet2/pointnet2_paconv_seg.py#L53\n        # PointNet++'s decoder conv has bias while PAConv's doesn't have\n        # so we need to rebuild it here\n        self.pre_seg_conv = ConvModule(\n            fp_channels[-1][-1],\n            self.channels,\n            kernel_size=1,\n            bias=False,\n            conv_cfg=self.conv_cfg,\n            norm_cfg=self.norm_cfg,\n            act_cfg=self.act_cfg)\n\n    def forward(self, feat_dict):\n        \"\"\"Forward pass.\n\n        Args:\n            feat_dict (dict): Feature dict from backbone.\n\n        Returns:\n            torch.Tensor: Segmentation map of shape [B, num_classes, N].\n        \"\"\"\n        sa_xyz, sa_features = self._extract_input(feat_dict)\n\n        # PointNet++ doesn't use the first level of `sa_features` as input\n        # while PAConv inputs it through skip-connection\n        fp_feature = sa_features[-1]\n\n        for i in range(self.num_fp):\n            # consume the points in a bottom-up manner\n            fp_feature = self.FP_modules[i](sa_xyz[-(i + 2)], sa_xyz[-(i + 1)],\n                                            sa_features[-(i + 2)], fp_feature)\n\n        output = self.pre_seg_conv(fp_feature)\n        output = self.cls_seg(output)\n\n        return output\n"
  },
  {
    "path": "mmdet3d/models/decode_heads/pointnet2_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.cnn.bricks import ConvModule\nfrom torch import nn as nn\n\nfrom mmdet3d.ops import PointFPModule\nfrom ..builder import HEADS\nfrom .decode_head import Base3DDecodeHead\n\n\n@HEADS.register_module()\nclass PointNet2Head(Base3DDecodeHead):\n    r\"\"\"PointNet2 decoder head.\n\n    Decoder head used in `PointNet++ <https://arxiv.org/abs/1706.02413>`_.\n    Refer to the `official code <https://github.com/charlesq34/pointnet2>`_.\n\n    Args:\n        fp_channels (tuple[tuple[int]]): Tuple of mlp channels in FP modules.\n        fp_norm_cfg (dict): Config of norm layers used in FP modules.\n            Default: dict(type='BN2d').\n    \"\"\"\n\n    def __init__(self,\n                 fp_channels=((768, 256, 256), (384, 256, 256),\n                              (320, 256, 128), (128, 128, 128, 128)),\n                 fp_norm_cfg=dict(type='BN2d'),\n                 **kwargs):\n        super(PointNet2Head, self).__init__(**kwargs)\n\n        self.num_fp = len(fp_channels)\n        self.FP_modules = nn.ModuleList()\n        for cur_fp_mlps in fp_channels:\n            self.FP_modules.append(\n                PointFPModule(mlp_channels=cur_fp_mlps, norm_cfg=fp_norm_cfg))\n\n        # https://github.com/charlesq34/pointnet2/blob/master/models/pointnet2_sem_seg.py#L40\n        self.pre_seg_conv = ConvModule(\n            fp_channels[-1][-1],\n            self.channels,\n            kernel_size=1,\n            bias=True,\n            conv_cfg=self.conv_cfg,\n            norm_cfg=self.norm_cfg,\n            act_cfg=self.act_cfg)\n\n    def _extract_input(self, feat_dict):\n        \"\"\"Extract inputs from features dictionary.\n\n        Args:\n            feat_dict (dict): Feature dict from backbone.\n\n        Returns:\n            list[torch.Tensor]: Coordinates of multiple levels of points.\n            list[torch.Tensor]: Features of multiple levels of points.\n        \"\"\"\n        sa_xyz = feat_dict['sa_xyz']\n        sa_features = feat_dict['sa_features']\n        assert len(sa_xyz) == len(sa_features)\n\n        return sa_xyz, sa_features\n\n    def forward(self, feat_dict):\n        \"\"\"Forward pass.\n\n        Args:\n            feat_dict (dict): Feature dict from backbone.\n\n        Returns:\n            torch.Tensor: Segmentation map of shape [B, num_classes, N].\n        \"\"\"\n        sa_xyz, sa_features = self._extract_input(feat_dict)\n\n        # https://github.com/charlesq34/pointnet2/blob/master/models/pointnet2_sem_seg.py#L24\n        sa_features[0] = None\n\n        fp_feature = sa_features[-1]\n\n        for i in range(self.num_fp):\n            # consume the points in a bottom-up manner\n            fp_feature = self.FP_modules[i](sa_xyz[-(i + 2)], sa_xyz[-(i + 1)],\n                                            sa_features[-(i + 2)], fp_feature)\n        output = self.pre_seg_conv(fp_feature)\n        output = self.cls_seg(output)\n\n        return output\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .anchor3d_head import Anchor3DHead\nfrom .anchor_free_mono3d_head import AnchorFreeMono3DHead\nfrom .base_conv_bbox_head import BaseConvBboxHead\nfrom .base_mono3d_dense_head import BaseMono3DDenseHead\nfrom .centerpoint_head import CenterHead\nfrom .fcaf3d_head import FCAF3DHead\nfrom .fcos_mono3d_head import FCOSMono3DHead\nfrom .free_anchor3d_head import FreeAnchor3DHead\nfrom .groupfree3d_head import GroupFree3DHead\nfrom .monoflex_head import MonoFlexHead\nfrom .parta2_rpn_head import PartA2RPNHead\nfrom .pgd_head import PGDHead\nfrom .point_rpn_head import PointRPNHead\nfrom .shape_aware_head import ShapeAwareHead\nfrom .smoke_mono3d_head import SMOKEMono3DHead\nfrom .ssd_3d_head import SSD3DHead\nfrom .vote_head import VoteHead\n\n__all__ = [\n    'Anchor3DHead', 'FreeAnchor3DHead', 'PartA2RPNHead', 'VoteHead',\n    'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead',\n    'BaseMono3DDenseHead', 'AnchorFreeMono3DHead', 'FCOSMono3DHead',\n    'GroupFree3DHead', 'PointRPNHead', 'SMOKEMono3DHead', 'PGDHead',\n    'MonoFlexHead', 'FCAF3DHead'\n]\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/anchor3d_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nfrom mmcv.runner import BaseModule, force_fp32\nfrom torch import nn as nn\n\nfrom mmdet3d.core import (PseudoSampler, box3d_multiclass_nms, limit_period,\n                          xywhr2xyxyr)\nfrom mmdet.core import (build_assigner, build_bbox_coder,\n                        build_prior_generator, build_sampler, multi_apply)\nfrom ..builder import HEADS, build_loss\nfrom .train_mixins import AnchorTrainMixin\n\n\n@HEADS.register_module()\nclass Anchor3DHead(BaseModule, AnchorTrainMixin):\n    \"\"\"Anchor head for SECOND/PointPillars/MVXNet/PartA2.\n\n    Args:\n        num_classes (int): Number of classes.\n        in_channels (int): Number of channels in the input feature map.\n        train_cfg (dict): Train configs.\n        test_cfg (dict): Test configs.\n        feat_channels (int): Number of channels of the feature map.\n        use_direction_classifier (bool): Whether to add a direction classifier.\n        anchor_generator(dict): Config dict of anchor generator.\n        assigner_per_size (bool): Whether to do assignment for each separate\n            anchor size.\n        assign_per_class (bool): Whether to do assignment for each class.\n        diff_rad_by_sin (bool): Whether to change the difference into sin\n            difference for box regression loss.\n        dir_offset (float | int): The offset of BEV rotation angles.\n            (TODO: may be moved into box coder)\n        dir_limit_offset (float | int): The limited range of BEV\n            rotation angles. (TODO: may be moved into box coder)\n        bbox_coder (dict): Config dict of box coders.\n        loss_cls (dict): Config of classification loss.\n        loss_bbox (dict): Config of localization loss.\n        loss_dir (dict): Config of direction classifier loss.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 train_cfg,\n                 test_cfg,\n                 feat_channels=256,\n                 use_direction_classifier=True,\n                 anchor_generator=dict(\n                     type='Anchor3DRangeGenerator',\n                     range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],\n                     strides=[2],\n                     sizes=[[3.9, 1.6, 1.56]],\n                     rotations=[0, 1.57],\n                     custom_values=[],\n                     reshape_out=False),\n                 assigner_per_size=False,\n                 assign_per_class=False,\n                 diff_rad_by_sin=True,\n                 dir_offset=-np.pi / 2,\n                 dir_limit_offset=0,\n                 bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n                 loss_cls=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     loss_weight=1.0),\n                 loss_bbox=dict(\n                     type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),\n                 loss_dir=dict(type='CrossEntropyLoss', loss_weight=0.2),\n                 init_cfg=None):\n        super().__init__(init_cfg=init_cfg)\n        self.in_channels = in_channels\n        self.num_classes = num_classes\n        self.feat_channels = feat_channels\n        self.diff_rad_by_sin = diff_rad_by_sin\n        self.use_direction_classifier = use_direction_classifier\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.assigner_per_size = assigner_per_size\n        self.assign_per_class = assign_per_class\n        self.dir_offset = dir_offset\n        self.dir_limit_offset = dir_limit_offset\n        import warnings\n        warnings.warn(\n            'dir_offset and dir_limit_offset will be depressed and be '\n            'incorporated into box coder in the future')\n        self.fp16_enabled = False\n\n        # build anchor generator\n        self.anchor_generator = build_prior_generator(anchor_generator)\n        # In 3D detection, the anchor stride is connected with anchor size\n        self.num_anchors = self.anchor_generator.num_base_anchors\n        # build box coder\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n        self.box_code_size = self.bbox_coder.code_size\n\n        # build loss function\n        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)\n        self.sampling = loss_cls['type'] not in ['FocalLoss', 'GHMC']\n        if not self.use_sigmoid_cls:\n            self.num_classes += 1\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_bbox = build_loss(loss_bbox)\n        self.loss_dir = build_loss(loss_dir)\n        self.fp16_enabled = False\n\n        self._init_layers()\n        self._init_assigner_sampler()\n\n        if init_cfg is None:\n            self.init_cfg = dict(\n                type='Normal',\n                layer='Conv2d',\n                std=0.01,\n                override=dict(\n                    type='Normal', name='conv_cls', std=0.01, bias_prob=0.01))\n\n    def _init_assigner_sampler(self):\n        \"\"\"Initialize the target assigner and sampler of the head.\"\"\"\n        if self.train_cfg is None:\n            return\n\n        if self.sampling:\n            self.bbox_sampler = build_sampler(self.train_cfg.sampler)\n        else:\n            self.bbox_sampler = PseudoSampler()\n        if isinstance(self.train_cfg.assigner, dict):\n            self.bbox_assigner = build_assigner(self.train_cfg.assigner)\n        elif isinstance(self.train_cfg.assigner, list):\n            self.bbox_assigner = [\n                build_assigner(res) for res in self.train_cfg.assigner\n            ]\n\n    def _init_layers(self):\n        \"\"\"Initialize neural network layers of the head.\"\"\"\n        self.cls_out_channels = self.num_anchors * self.num_classes\n        self.conv_cls = nn.Conv2d(self.feat_channels, self.cls_out_channels, 1)\n        self.conv_reg = nn.Conv2d(self.feat_channels,\n                                  self.num_anchors * self.box_code_size, 1)\n        if self.use_direction_classifier:\n            self.conv_dir_cls = nn.Conv2d(self.feat_channels,\n                                          self.num_anchors * 2, 1)\n\n    def forward_single(self, x):\n        \"\"\"Forward function on a single-scale feature map.\n\n        Args:\n            x (torch.Tensor): Input features.\n\n        Returns:\n            tuple[torch.Tensor]: Contain score of each class, bbox\n                regression and direction classification predictions.\n        \"\"\"\n        cls_score = self.conv_cls(x)\n        bbox_pred = self.conv_reg(x)\n        dir_cls_preds = None\n        if self.use_direction_classifier:\n            dir_cls_preds = self.conv_dir_cls(x)\n        return cls_score, bbox_pred, dir_cls_preds\n\n    def forward(self, feats):\n        \"\"\"Forward pass.\n\n        Args:\n            feats (list[torch.Tensor]): Multi-level features, e.g.,\n                features produced by FPN.\n\n        Returns:\n            tuple[list[torch.Tensor]]: Multi-level class score, bbox\n                and direction predictions.\n        \"\"\"\n        return multi_apply(self.forward_single, feats)\n\n    def get_anchors(self, featmap_sizes, input_metas, device='cuda'):\n        \"\"\"Get anchors according to feature map sizes.\n\n        Args:\n            featmap_sizes (list[tuple]): Multi-level feature map sizes.\n            input_metas (list[dict]): contain pcd and img's meta info.\n            device (str): device of current module.\n\n        Returns:\n            list[list[torch.Tensor]]: Anchors of each image, valid flags\n                of each image.\n        \"\"\"\n        num_imgs = len(input_metas)\n        # since feature map sizes of all images are the same, we only compute\n        # anchors for one time\n        multi_level_anchors = self.anchor_generator.grid_anchors(\n            featmap_sizes, device=device)\n        anchor_list = [multi_level_anchors for _ in range(num_imgs)]\n        return anchor_list\n\n    def loss_single(self, cls_score, bbox_pred, dir_cls_preds, labels,\n                    label_weights, bbox_targets, bbox_weights, dir_targets,\n                    dir_weights, num_total_samples):\n        \"\"\"Calculate loss of Single-level results.\n\n        Args:\n            cls_score (torch.Tensor): Class score in single-level.\n            bbox_pred (torch.Tensor): Bbox prediction in single-level.\n            dir_cls_preds (torch.Tensor): Predictions of direction class\n                in single-level.\n            labels (torch.Tensor): Labels of class.\n            label_weights (torch.Tensor): Weights of class loss.\n            bbox_targets (torch.Tensor): Targets of bbox predictions.\n            bbox_weights (torch.Tensor): Weights of bbox loss.\n            dir_targets (torch.Tensor): Targets of direction predictions.\n            dir_weights (torch.Tensor): Weights of direction loss.\n            num_total_samples (int): The number of valid samples.\n\n        Returns:\n            tuple[torch.Tensor]: Losses of class, bbox\n                and direction, respectively.\n        \"\"\"\n        # classification loss\n        if num_total_samples is None:\n            num_total_samples = int(cls_score.shape[0])\n        labels = labels.reshape(-1)\n        label_weights = label_weights.reshape(-1)\n        cls_score = cls_score.permute(0, 2, 3, 1).reshape(-1, self.num_classes)\n        assert labels.max().item() <= self.num_classes\n        loss_cls = self.loss_cls(\n            cls_score, labels, label_weights, avg_factor=num_total_samples)\n\n        # regression loss\n        bbox_pred = bbox_pred.permute(0, 2, 3,\n                                      1).reshape(-1, self.box_code_size)\n        bbox_targets = bbox_targets.reshape(-1, self.box_code_size)\n        bbox_weights = bbox_weights.reshape(-1, self.box_code_size)\n\n        bg_class_ind = self.num_classes\n        pos_inds = ((labels >= 0)\n                    & (labels < bg_class_ind)).nonzero(\n                        as_tuple=False).reshape(-1)\n        num_pos = len(pos_inds)\n\n        pos_bbox_pred = bbox_pred[pos_inds]\n        pos_bbox_targets = bbox_targets[pos_inds]\n        pos_bbox_weights = bbox_weights[pos_inds]\n\n        # dir loss\n        if self.use_direction_classifier:\n            dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).reshape(-1, 2)\n            dir_targets = dir_targets.reshape(-1)\n            dir_weights = dir_weights.reshape(-1)\n            pos_dir_cls_preds = dir_cls_preds[pos_inds]\n            pos_dir_targets = dir_targets[pos_inds]\n            pos_dir_weights = dir_weights[pos_inds]\n\n        if num_pos > 0:\n            code_weight = self.train_cfg.get('code_weight', None)\n            if code_weight:\n                pos_bbox_weights = pos_bbox_weights * bbox_weights.new_tensor(\n                    code_weight)\n            if self.diff_rad_by_sin:\n                pos_bbox_pred, pos_bbox_targets = self.add_sin_difference(\n                    pos_bbox_pred, pos_bbox_targets)\n            loss_bbox = self.loss_bbox(\n                pos_bbox_pred,\n                pos_bbox_targets,\n                pos_bbox_weights,\n                avg_factor=num_total_samples)\n\n            # direction classification loss\n            loss_dir = None\n            if self.use_direction_classifier:\n                loss_dir = self.loss_dir(\n                    pos_dir_cls_preds,\n                    pos_dir_targets,\n                    pos_dir_weights,\n                    avg_factor=num_total_samples)\n        else:\n            loss_bbox = pos_bbox_pred.sum()\n            if self.use_direction_classifier:\n                loss_dir = pos_dir_cls_preds.sum()\n\n        return loss_cls, loss_bbox, loss_dir\n\n    @staticmethod\n    def add_sin_difference(boxes1, boxes2):\n        \"\"\"Convert the rotation difference to difference in sine function.\n\n        Args:\n            boxes1 (torch.Tensor): Original Boxes in shape (NxC), where C>=7\n                and the 7th dimension is rotation dimension.\n            boxes2 (torch.Tensor): Target boxes in shape (NxC), where C>=7 and\n                the 7th dimension is rotation dimension.\n\n        Returns:\n            tuple[torch.Tensor]: ``boxes1`` and ``boxes2`` whose 7th\n                dimensions are changed.\n        \"\"\"\n        rad_pred_encoding = torch.sin(boxes1[..., 6:7]) * torch.cos(\n            boxes2[..., 6:7])\n        rad_tg_encoding = torch.cos(boxes1[..., 6:7]) * torch.sin(boxes2[...,\n                                                                         6:7])\n        boxes1 = torch.cat(\n            [boxes1[..., :6], rad_pred_encoding, boxes1[..., 7:]], dim=-1)\n        boxes2 = torch.cat([boxes2[..., :6], rad_tg_encoding, boxes2[..., 7:]],\n                           dim=-1)\n        return boxes1, boxes2\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             dir_cls_preds,\n             gt_bboxes,\n             gt_labels,\n             input_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Calculate losses.\n\n        Args:\n            cls_scores (list[torch.Tensor]): Multi-level class scores.\n            bbox_preds (list[torch.Tensor]): Multi-level bbox predictions.\n            dir_cls_preds (list[torch.Tensor]): Multi-level direction\n                class predictions.\n            gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Gt bboxes\n                of each sample.\n            gt_labels (list[torch.Tensor]): Gt labels of each sample.\n            input_metas (list[dict]): Contain pcd and img's meta info.\n            gt_bboxes_ignore (list[torch.Tensor]): Specify\n                which bounding boxes to ignore.\n\n        Returns:\n            dict[str, list[torch.Tensor]]: Classification, bbox, and\n                direction losses of each level.\n\n                - loss_cls (list[torch.Tensor]): Classification losses.\n                - loss_bbox (list[torch.Tensor]): Box regression losses.\n                - loss_dir (list[torch.Tensor]): Direction classification\n                    losses.\n        \"\"\"\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(featmap_sizes) == self.anchor_generator.num_levels\n        device = cls_scores[0].device\n        anchor_list = self.get_anchors(\n            featmap_sizes, input_metas, device=device)\n        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1\n        cls_reg_targets = self.anchor_target_3d(\n            anchor_list,\n            gt_bboxes,\n            input_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            num_classes=self.num_classes,\n            label_channels=label_channels,\n            sampling=self.sampling)\n\n        if cls_reg_targets is None:\n            return None\n        (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,\n         dir_targets_list, dir_weights_list, num_total_pos,\n         num_total_neg) = cls_reg_targets\n        num_total_samples = (\n            num_total_pos + num_total_neg if self.sampling else num_total_pos)\n\n        # num_total_samples = None\n        losses_cls, losses_bbox, losses_dir = multi_apply(\n            self.loss_single,\n            cls_scores,\n            bbox_preds,\n            dir_cls_preds,\n            labels_list,\n            label_weights_list,\n            bbox_targets_list,\n            bbox_weights_list,\n            dir_targets_list,\n            dir_weights_list,\n            num_total_samples=num_total_samples)\n        return dict(\n            loss_cls=losses_cls, loss_bbox=losses_bbox, loss_dir=losses_dir)\n\n    def get_bboxes(self,\n                   cls_scores,\n                   bbox_preds,\n                   dir_cls_preds,\n                   input_metas,\n                   cfg=None,\n                   rescale=False):\n        \"\"\"Get bboxes of anchor head.\n\n        Args:\n            cls_scores (list[torch.Tensor]): Multi-level class scores.\n            bbox_preds (list[torch.Tensor]): Multi-level bbox predictions.\n            dir_cls_preds (list[torch.Tensor]): Multi-level direction\n                class predictions.\n            input_metas (list[dict]): Contain pcd and img's meta info.\n            cfg (:obj:`ConfigDict`): Training or testing config.\n            rescale (list[torch.Tensor]): Whether th rescale bbox.\n\n        Returns:\n            list[tuple]: Prediction resultes of batches.\n        \"\"\"\n        assert len(cls_scores) == len(bbox_preds)\n        assert len(cls_scores) == len(dir_cls_preds)\n        num_levels = len(cls_scores)\n        featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)]\n        device = cls_scores[0].device\n        mlvl_anchors = self.anchor_generator.grid_anchors(\n            featmap_sizes, device=device)\n        mlvl_anchors = [\n            anchor.reshape(-1, self.box_code_size) for anchor in mlvl_anchors\n        ]\n\n        result_list = []\n        for img_id in range(len(input_metas)):\n            cls_score_list = [\n                cls_scores[i][img_id].detach() for i in range(num_levels)\n            ]\n            bbox_pred_list = [\n                bbox_preds[i][img_id].detach() for i in range(num_levels)\n            ]\n            dir_cls_pred_list = [\n                dir_cls_preds[i][img_id].detach() for i in range(num_levels)\n            ]\n\n            input_meta = input_metas[img_id]\n            proposals = self.get_bboxes_single(cls_score_list, bbox_pred_list,\n                                               dir_cls_pred_list, mlvl_anchors,\n                                               input_meta, cfg, rescale)\n            result_list.append(proposals)\n        return result_list\n\n    def get_bboxes_single(self,\n                          cls_scores,\n                          bbox_preds,\n                          dir_cls_preds,\n                          mlvl_anchors,\n                          input_meta,\n                          cfg=None,\n                          rescale=False):\n        \"\"\"Get bboxes of single branch.\n\n        Args:\n            cls_scores (torch.Tensor): Class score in single batch.\n            bbox_preds (torch.Tensor): Bbox prediction in single batch.\n            dir_cls_preds (torch.Tensor): Predictions of direction class\n                in single batch.\n            mlvl_anchors (List[torch.Tensor]): Multi-level anchors\n                in single batch.\n            input_meta (list[dict]): Contain pcd and img's meta info.\n            cfg (:obj:`ConfigDict`): Training or testing config.\n            rescale (list[torch.Tensor]): whether th rescale bbox.\n\n        Returns:\n            tuple: Contain predictions of single batch.\n\n                - bboxes (:obj:`BaseInstance3DBoxes`): Predicted 3d bboxes.\n                - scores (torch.Tensor): Class score of each bbox.\n                - labels (torch.Tensor): Label of each bbox.\n        \"\"\"\n        cfg = self.test_cfg if cfg is None else cfg\n        assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)\n        mlvl_bboxes = []\n        mlvl_scores = []\n        mlvl_dir_scores = []\n        for cls_score, bbox_pred, dir_cls_pred, anchors in zip(\n                cls_scores, bbox_preds, dir_cls_preds, mlvl_anchors):\n            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]\n            assert cls_score.size()[-2:] == dir_cls_pred.size()[-2:]\n            dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2)\n            dir_cls_score = torch.max(dir_cls_pred, dim=-1)[1]\n\n            cls_score = cls_score.permute(1, 2,\n                                          0).reshape(-1, self.num_classes)\n            if self.use_sigmoid_cls:\n                scores = cls_score.sigmoid()\n            else:\n                scores = cls_score.softmax(-1)\n            bbox_pred = bbox_pred.permute(1, 2,\n                                          0).reshape(-1, self.box_code_size)\n\n            nms_pre = cfg.get('nms_pre', -1)\n            if nms_pre > 0 and scores.shape[0] > nms_pre:\n                if self.use_sigmoid_cls:\n                    max_scores, _ = scores.max(dim=1)\n                else:\n                    max_scores, _ = scores[:, :-1].max(dim=1)\n                _, topk_inds = max_scores.topk(nms_pre)\n                anchors = anchors[topk_inds, :]\n                bbox_pred = bbox_pred[topk_inds, :]\n                scores = scores[topk_inds, :]\n                dir_cls_score = dir_cls_score[topk_inds]\n\n            bboxes = self.bbox_coder.decode(anchors, bbox_pred)\n            mlvl_bboxes.append(bboxes)\n            mlvl_scores.append(scores)\n            mlvl_dir_scores.append(dir_cls_score)\n\n        mlvl_bboxes = torch.cat(mlvl_bboxes)\n        mlvl_bboxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](\n            mlvl_bboxes, box_dim=self.box_code_size).bev)\n        mlvl_scores = torch.cat(mlvl_scores)\n        mlvl_dir_scores = torch.cat(mlvl_dir_scores)\n\n        if self.use_sigmoid_cls:\n            # Add a dummy background class to the front when using sigmoid\n            padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)\n            mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)\n\n        score_thr = cfg.get('score_thr', 0)\n        results = box3d_multiclass_nms(mlvl_bboxes, mlvl_bboxes_for_nms,\n                                       mlvl_scores, score_thr, cfg.max_num,\n                                       cfg, mlvl_dir_scores)\n        bboxes, scores, labels, dir_scores = results\n        if bboxes.shape[0] > 0:\n            dir_rot = limit_period(bboxes[..., 6] - self.dir_offset,\n                                   self.dir_limit_offset, np.pi)\n            bboxes[..., 6] = (\n                dir_rot + self.dir_offset +\n                np.pi * dir_scores.to(bboxes.dtype))\n        bboxes = input_meta['box_type_3d'](bboxes, box_dim=self.box_code_size)\n        return bboxes, scores, labels\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/anchor_free_mono3d_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom abc import abstractmethod\n\nimport torch\nfrom mmcv.cnn import ConvModule, bias_init_with_prob, normal_init\nfrom mmcv.runner import force_fp32\nfrom torch import nn as nn\n\nfrom mmdet.core import multi_apply\nfrom ..builder import HEADS, build_loss\nfrom .base_mono3d_dense_head import BaseMono3DDenseHead\n\n\n@HEADS.register_module()\nclass AnchorFreeMono3DHead(BaseMono3DDenseHead):\n    \"\"\"Anchor-free head for monocular 3D object detection.\n\n    Args:\n        num_classes (int): Number of categories excluding the background\n            category.\n        in_channels (int): Number of channels in the input feature map.\n        feat_channels (int, optional): Number of hidden channels.\n            Used in child classes. Defaults to 256.\n        stacked_convs (int, optional): Number of stacking convs of the head.\n        strides (tuple, optional): Downsample factor of each feature map.\n        dcn_on_last_conv (bool, optional): If true, use dcn in the last\n            layer of towers. Default: False.\n        conv_bias (bool | str, optional): If specified as `auto`, it will be\n            decided by the norm_cfg. Bias of conv will be set as True\n            if `norm_cfg` is None, otherwise False. Default: 'auto'.\n        background_label (int, optional): Label ID of background,\n            set as 0 for RPN and num_classes for other heads.\n            It will automatically set as `num_classes` if None is given.\n        use_direction_classifier (bool, optional):\n            Whether to add a direction classifier.\n        diff_rad_by_sin (bool, optional): Whether to change the difference\n            into sin difference for box regression loss. Defaults to True.\n        dir_offset (float, optional): Parameter used in direction\n            classification. Defaults to 0.\n        dir_limit_offset (float, optional): Parameter used in direction\n            classification. Defaults to 0.\n        loss_cls (dict, optional): Config of classification loss.\n        loss_bbox (dict, optional): Config of localization loss.\n        loss_dir (dict, optional): Config of direction classifier loss.\n        loss_attr (dict, optional): Config of attribute classifier loss,\n            which is only active when `pred_attrs=True`.\n        bbox_code_size (int, optional): Dimensions of predicted bounding boxes.\n        pred_attrs (bool, optional): Whether to predict attributes.\n            Defaults to False.\n        num_attrs (int, optional): The number of attributes to be predicted.\n            Default: 9.\n        pred_velo (bool, optional): Whether to predict velocity.\n            Defaults to False.\n        pred_bbox2d (bool, optional): Whether to predict 2D boxes.\n            Defaults to False.\n        group_reg_dims (tuple[int], optional): The dimension of each regression\n            target group. Default: (2, 1, 3, 1, 2).\n        cls_branch (tuple[int], optional): Channels for classification branch.\n            Default: (128, 64).\n        reg_branch (tuple[tuple], optional): Channels for regression branch.\n            Default: (\n                (128, 64),  # offset\n                (128, 64),  # depth\n                (64, ),  # size\n                (64, ),  # rot\n                ()  # velo\n            ),\n        dir_branch (tuple[int], optional): Channels for direction\n            classification branch. Default: (64, ).\n        attr_branch (tuple[int], optional): Channels for classification branch.\n            Default: (64, ).\n        conv_cfg (dict, optional): Config dict for convolution layer.\n            Default: None.\n        norm_cfg (dict, optional): Config dict for normalization layer.\n            Default: None.\n        train_cfg (dict, optional): Training config of anchor head.\n        test_cfg (dict, optional): Testing config of anchor head.\n    \"\"\"  # noqa: W605\n\n    _version = 1\n\n    def __init__(\n            self,\n            num_classes,\n            in_channels,\n            feat_channels=256,\n            stacked_convs=4,\n            strides=(4, 8, 16, 32, 64),\n            dcn_on_last_conv=False,\n            conv_bias='auto',\n            background_label=None,\n            use_direction_classifier=True,\n            diff_rad_by_sin=True,\n            dir_offset=0,\n            dir_limit_offset=0,\n            loss_cls=dict(\n                type='FocalLoss',\n                use_sigmoid=True,\n                gamma=2.0,\n                alpha=0.25,\n                loss_weight=1.0),\n            loss_bbox=dict(\n                type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),\n            loss_dir=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_attr=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            bbox_code_size=9,  # For nuscenes\n            pred_attrs=False,\n            num_attrs=9,  # For nuscenes\n            pred_velo=False,\n            pred_bbox2d=False,\n            group_reg_dims=(2, 1, 3, 1, 2),  # offset, depth, size, rot, velo,\n            cls_branch=(128, 64),\n            reg_branch=(\n                (128, 64),  # offset\n                (128, 64),  # depth\n                (64, ),  # size\n                (64, ),  # rot\n                ()  # velo\n            ),\n            dir_branch=(64, ),\n            attr_branch=(64, ),\n            conv_cfg=None,\n            norm_cfg=None,\n            train_cfg=None,\n            test_cfg=None,\n            init_cfg=None):\n        super(AnchorFreeMono3DHead, self).__init__(init_cfg=init_cfg)\n        self.num_classes = num_classes\n        self.cls_out_channels = num_classes\n        self.in_channels = in_channels\n        self.feat_channels = feat_channels\n        self.stacked_convs = stacked_convs\n        self.strides = strides\n        self.dcn_on_last_conv = dcn_on_last_conv\n        assert conv_bias == 'auto' or isinstance(conv_bias, bool)\n        self.conv_bias = conv_bias\n        self.use_direction_classifier = use_direction_classifier\n        self.diff_rad_by_sin = diff_rad_by_sin\n        self.dir_offset = dir_offset\n        self.dir_limit_offset = dir_limit_offset\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_bbox = build_loss(loss_bbox)\n        self.loss_dir = build_loss(loss_dir)\n        self.bbox_code_size = bbox_code_size\n        self.group_reg_dims = list(group_reg_dims)\n        self.cls_branch = cls_branch\n        self.reg_branch = reg_branch\n        assert len(reg_branch) == len(group_reg_dims), 'The number of '\\\n            'element in reg_branch and group_reg_dims should be the same.'\n        self.pred_velo = pred_velo\n        self.pred_bbox2d = pred_bbox2d\n        self.out_channels = []\n        for reg_branch_channels in reg_branch:\n            if len(reg_branch_channels) > 0:\n                self.out_channels.append(reg_branch_channels[-1])\n            else:\n                self.out_channels.append(-1)\n        self.dir_branch = dir_branch\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.fp16_enabled = False\n        self.background_label = (\n            num_classes if background_label is None else background_label)\n        # background_label should be either 0 or num_classes\n        assert (self.background_label == 0\n                or self.background_label == num_classes)\n        self.pred_attrs = pred_attrs\n        self.attr_background_label = -1\n        self.num_attrs = num_attrs\n        if self.pred_attrs:\n            self.attr_background_label = num_attrs\n            self.loss_attr = build_loss(loss_attr)\n            self.attr_branch = attr_branch\n\n        self._init_layers()\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the head.\"\"\"\n        self._init_cls_convs()\n        self._init_reg_convs()\n        self._init_predictor()\n\n    def _init_cls_convs(self):\n        \"\"\"Initialize classification conv layers of the head.\"\"\"\n        self.cls_convs = nn.ModuleList()\n        for i in range(self.stacked_convs):\n            chn = self.in_channels if i == 0 else self.feat_channels\n            if self.dcn_on_last_conv and i == self.stacked_convs - 1:\n                conv_cfg = dict(type='DCNv2')\n            else:\n                conv_cfg = self.conv_cfg\n            self.cls_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=self.norm_cfg,\n                    bias=self.conv_bias))\n\n    def _init_reg_convs(self):\n        \"\"\"Initialize bbox regression conv layers of the head.\"\"\"\n        self.reg_convs = nn.ModuleList()\n        for i in range(self.stacked_convs):\n            chn = self.in_channels if i == 0 else self.feat_channels\n            if self.dcn_on_last_conv and i == self.stacked_convs - 1:\n                conv_cfg = dict(type='DCNv2')\n            else:\n                conv_cfg = self.conv_cfg\n            self.reg_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=self.norm_cfg,\n                    bias=self.conv_bias))\n\n    def _init_branch(self, conv_channels=(64), conv_strides=(1)):\n        \"\"\"Initialize conv layers as a prediction branch.\"\"\"\n        conv_before_pred = nn.ModuleList()\n        if isinstance(conv_channels, int):\n            conv_channels = [self.feat_channels] + [conv_channels]\n            conv_strides = [conv_strides]\n        else:\n            conv_channels = [self.feat_channels] + list(conv_channels)\n            conv_strides = list(conv_strides)\n        for i in range(len(conv_strides)):\n            conv_before_pred.append(\n                ConvModule(\n                    conv_channels[i],\n                    conv_channels[i + 1],\n                    3,\n                    stride=conv_strides[i],\n                    padding=1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg,\n                    bias=self.conv_bias))\n\n        return conv_before_pred\n\n    def _init_predictor(self):\n        \"\"\"Initialize predictor layers of the head.\"\"\"\n        self.conv_cls_prev = self._init_branch(\n            conv_channels=self.cls_branch,\n            conv_strides=(1, ) * len(self.cls_branch))\n        self.conv_cls = nn.Conv2d(self.cls_branch[-1], self.cls_out_channels,\n                                  1)\n        self.conv_reg_prevs = nn.ModuleList()\n        self.conv_regs = nn.ModuleList()\n        for i in range(len(self.group_reg_dims)):\n            reg_dim = self.group_reg_dims[i]\n            reg_branch_channels = self.reg_branch[i]\n            out_channel = self.out_channels[i]\n            if len(reg_branch_channels) > 0:\n                self.conv_reg_prevs.append(\n                    self._init_branch(\n                        conv_channels=reg_branch_channels,\n                        conv_strides=(1, ) * len(reg_branch_channels)))\n                self.conv_regs.append(nn.Conv2d(out_channel, reg_dim, 1))\n            else:\n                self.conv_reg_prevs.append(None)\n                self.conv_regs.append(\n                    nn.Conv2d(self.feat_channels, reg_dim, 1))\n        if self.use_direction_classifier:\n            self.conv_dir_cls_prev = self._init_branch(\n                conv_channels=self.dir_branch,\n                conv_strides=(1, ) * len(self.dir_branch))\n            self.conv_dir_cls = nn.Conv2d(self.dir_branch[-1], 2, 1)\n        if self.pred_attrs:\n            self.conv_attr_prev = self._init_branch(\n                conv_channels=self.attr_branch,\n                conv_strides=(1, ) * len(self.attr_branch))\n            self.conv_attr = nn.Conv2d(self.attr_branch[-1], self.num_attrs, 1)\n\n    def init_weights(self):\n        \"\"\"Initialize weights of the head.\n\n        We currently still use the customized defined init_weights because the\n        default init of DCN triggered by the init_cfg will init\n        conv_offset.weight, which mistakenly affects the training stability.\n        \"\"\"\n        for modules in [self.cls_convs, self.reg_convs, self.conv_cls_prev]:\n            for m in modules:\n                if isinstance(m.conv, nn.Conv2d):\n                    normal_init(m.conv, std=0.01)\n        for conv_reg_prev in self.conv_reg_prevs:\n            if conv_reg_prev is None:\n                continue\n            for m in conv_reg_prev:\n                if isinstance(m.conv, nn.Conv2d):\n                    normal_init(m.conv, std=0.01)\n        if self.use_direction_classifier:\n            for m in self.conv_dir_cls_prev:\n                if isinstance(m.conv, nn.Conv2d):\n                    normal_init(m.conv, std=0.01)\n        if self.pred_attrs:\n            for m in self.conv_attr_prev:\n                if isinstance(m.conv, nn.Conv2d):\n                    normal_init(m.conv, std=0.01)\n        bias_cls = bias_init_with_prob(0.01)\n        normal_init(self.conv_cls, std=0.01, bias=bias_cls)\n        for conv_reg in self.conv_regs:\n            normal_init(conv_reg, std=0.01)\n        if self.use_direction_classifier:\n            normal_init(self.conv_dir_cls, std=0.01, bias=bias_cls)\n        if self.pred_attrs:\n            normal_init(self.conv_attr, std=0.01, bias=bias_cls)\n\n    def forward(self, feats):\n        \"\"\"Forward features from the upstream network.\n\n        Args:\n            feats (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n\n        Returns:\n            tuple: Usually contain classification scores, bbox predictions,\n                and direction class predictions.\n                cls_scores (list[Tensor]): Box scores for each scale level,\n                    each is a 4D-tensor, the channel number is\n                    num_points * num_classes.\n                bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                    level, each is a 4D-tensor, the channel number is\n                    num_points * bbox_code_size.\n                dir_cls_preds (list[Tensor]): Box scores for direction class\n                    predictions on each scale level, each is a 4D-tensor,\n                    the channel number is num_points * 2. (bin = 2)\n                attr_preds (list[Tensor]): Attribute scores for each scale\n                    level, each is a 4D-tensor, the channel number is\n                    num_points * num_attrs.\n        \"\"\"\n        return multi_apply(self.forward_single, feats)[:5]\n\n    def forward_single(self, x):\n        \"\"\"Forward features of a single scale level.\n\n        Args:\n            x (Tensor): FPN feature maps of the specified stride.\n\n        Returns:\n            tuple: Scores for each class, bbox predictions, direction class,\n                and attributes, features after classification and regression\n                conv layers, some models needs these features like FCOS.\n        \"\"\"\n        cls_feat = x\n        reg_feat = x\n\n        for cls_layer in self.cls_convs:\n            cls_feat = cls_layer(cls_feat)\n        # clone the cls_feat for reusing the feature map afterwards\n        clone_cls_feat = cls_feat.clone()\n        for conv_cls_prev_layer in self.conv_cls_prev:\n            clone_cls_feat = conv_cls_prev_layer(clone_cls_feat)\n        cls_score = self.conv_cls(clone_cls_feat)\n\n        for reg_layer in self.reg_convs:\n            reg_feat = reg_layer(reg_feat)\n        bbox_pred = []\n        for i in range(len(self.group_reg_dims)):\n            # clone the reg_feat for reusing the feature map afterwards\n            clone_reg_feat = reg_feat.clone()\n            if len(self.reg_branch[i]) > 0:\n                for conv_reg_prev_layer in self.conv_reg_prevs[i]:\n                    clone_reg_feat = conv_reg_prev_layer(clone_reg_feat)\n            bbox_pred.append(self.conv_regs[i](clone_reg_feat))\n        bbox_pred = torch.cat(bbox_pred, dim=1)\n\n        dir_cls_pred = None\n        if self.use_direction_classifier:\n            clone_reg_feat = reg_feat.clone()\n            for conv_dir_cls_prev_layer in self.conv_dir_cls_prev:\n                clone_reg_feat = conv_dir_cls_prev_layer(clone_reg_feat)\n            dir_cls_pred = self.conv_dir_cls(clone_reg_feat)\n\n        attr_pred = None\n        if self.pred_attrs:\n            # clone the cls_feat for reusing the feature map afterwards\n            clone_cls_feat = cls_feat.clone()\n            for conv_attr_prev_layer in self.conv_attr_prev:\n                clone_cls_feat = conv_attr_prev_layer(clone_cls_feat)\n            attr_pred = self.conv_attr(clone_cls_feat)\n\n        return cls_score, bbox_pred, dir_cls_pred, attr_pred, cls_feat, \\\n            reg_feat\n\n    @abstractmethod\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             dir_cls_preds,\n             attr_preds,\n             gt_bboxes,\n             gt_labels,\n             gt_bboxes_3d,\n             gt_labels_3d,\n             centers2d,\n             depths,\n             attr_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute loss of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level,\n                each is a 4D-tensor, the channel number is\n                num_points * num_classes.\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level, each is a 4D-tensor, the channel number is\n                num_points * bbox_code_size.\n            dir_cls_preds (list[Tensor]): Box scores for direction class\n                predictions on each scale level, each is a 4D-tensor,\n                the channel number is num_points * 2. (bin = 2)\n            attr_preds (list[Tensor]): Box scores for each scale level,\n                each is a 4D-tensor, the channel number is\n                num_points * num_attrs.\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            gt_bboxes_3d (list[Tensor]): 3D Ground truth bboxes for each\n                image with shape (num_gts, bbox_code_size).\n            gt_labels_3d (list[Tensor]): 3D class indices of each box.\n            centers2d (list[Tensor]): Projected 3D centers onto 2D images.\n            depths (list[Tensor]): Depth of projected centers on 2D images.\n            attr_labels (list[Tensor], optional): Attribute indices\n                corresponding to each box\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss.\n        \"\"\"\n\n        raise NotImplementedError\n\n    @abstractmethod\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds'))\n    def get_bboxes(self,\n                   cls_scores,\n                   bbox_preds,\n                   dir_cls_preds,\n                   attr_preds,\n                   img_metas,\n                   cfg=None,\n                   rescale=None):\n        \"\"\"Transform network output for a batch into bbox predictions.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level\n                Has shape (N, num_points * num_classes, H, W)\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level with shape (N, num_points * bbox_code_size, H, W)\n            dir_cls_preds (list[Tensor]): Box scores for direction class\n                predictions on each scale level, each is a 4D-tensor,\n                the channel number is num_points * 2. (bin = 2)\n            attr_preds (list[Tensor]): Attribute scores for each scale level\n                Has shape (N, num_points * num_attrs, H, W)\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            cfg (mmcv.Config): Test / postprocessing configuration,\n                if None, test_cfg would be used\n            rescale (bool): If True, return boxes in original image space\n        \"\"\"\n\n        raise NotImplementedError\n\n    @abstractmethod\n    def get_targets(self, points, gt_bboxes_list, gt_labels_list,\n                    gt_bboxes_3d_list, gt_labels_3d_list, centers2d_list,\n                    depths_list, attr_labels_list):\n        \"\"\"Compute regression, classification and centerss targets for points\n        in multiple images.\n\n        Args:\n            points (list[Tensor]): Points of each fpn level, each has shape\n                (num_points, 2).\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image,\n                each has shape (num_gt, 4).\n            gt_labels_list (list[Tensor]): Ground truth labels of each box,\n                each has shape (num_gt,).\n            gt_bboxes_3d_list (list[Tensor]): 3D Ground truth bboxes of each\n                image, each has shape (num_gt, bbox_code_size).\n            gt_labels_3d_list (list[Tensor]): 3D Ground truth labels of each\n                box, each has shape (num_gt,).\n            centers2d_list (list[Tensor]): Projected 3D centers onto 2D image,\n                each has shape (num_gt, 2).\n            depths_list (list[Tensor]): Depth of projected 3D centers onto 2D\n                image, each has shape (num_gt, 1).\n            attr_labels_list (list[Tensor]): Attribute labels of each box,\n                each has shape (num_gt,).\n        \"\"\"\n        raise NotImplementedError\n\n    def _get_points_single(self,\n                           featmap_size,\n                           stride,\n                           dtype,\n                           device,\n                           flatten=False):\n        \"\"\"Get points of a single scale level.\"\"\"\n        h, w = featmap_size\n        x_range = torch.arange(w, dtype=dtype, device=device)\n        y_range = torch.arange(h, dtype=dtype, device=device)\n        y, x = torch.meshgrid(y_range, x_range)\n        if flatten:\n            y = y.flatten()\n            x = x.flatten()\n        return y, x\n\n    def get_points(self, featmap_sizes, dtype, device, flatten=False):\n        \"\"\"Get points according to feature map sizes.\n\n        Args:\n            featmap_sizes (list[tuple]): Multi-level feature map sizes.\n            dtype (torch.dtype): Type of points.\n            device (torch.device): Device of points.\n\n        Returns:\n            tuple: points of each image.\n        \"\"\"\n        mlvl_points = []\n        for i in range(len(featmap_sizes)):\n            mlvl_points.append(\n                self._get_points_single(featmap_sizes[i], self.strides[i],\n                                        dtype, device, flatten))\n        return mlvl_points\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/base_conv_bbox_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.cnn import ConvModule\nfrom mmcv.cnn.bricks import build_conv_layer\nfrom mmcv.runner import BaseModule\nfrom torch import nn as nn\n\nfrom ..builder import HEADS\n\n\n@HEADS.register_module()\nclass BaseConvBboxHead(BaseModule):\n    r\"\"\"More general bbox head, with shared conv layers and two optional\n    separated branches.\n\n    .. code-block:: none\n\n                     /-> cls convs -> cls_score\n        shared convs\n                     \\-> reg convs -> bbox_pred\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=0,\n                 shared_conv_channels=(),\n                 cls_conv_channels=(),\n                 num_cls_out_channels=0,\n                 reg_conv_channels=(),\n                 num_reg_out_channels=0,\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=dict(type='BN1d'),\n                 act_cfg=dict(type='ReLU'),\n                 bias='auto',\n                 init_cfg=None,\n                 *args,\n                 **kwargs):\n        super(BaseConvBboxHead, self).__init__(\n            init_cfg=init_cfg, *args, **kwargs)\n        assert in_channels > 0\n        assert num_cls_out_channels > 0\n        assert num_reg_out_channels > 0\n        self.in_channels = in_channels\n        self.shared_conv_channels = shared_conv_channels\n        self.cls_conv_channels = cls_conv_channels\n        self.num_cls_out_channels = num_cls_out_channels\n        self.reg_conv_channels = reg_conv_channels\n        self.num_reg_out_channels = num_reg_out_channels\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.act_cfg = act_cfg\n        self.bias = bias\n\n        # add shared convs\n        if len(self.shared_conv_channels) > 0:\n            self.shared_convs = self._add_conv_branch(\n                self.in_channels, self.shared_conv_channels)\n            out_channels = self.shared_conv_channels[-1]\n        else:\n            out_channels = self.in_channels\n\n        # add cls specific branch\n        prev_channel = out_channels\n        if len(self.cls_conv_channels) > 0:\n            self.cls_convs = self._add_conv_branch(prev_channel,\n                                                   self.cls_conv_channels)\n            prev_channel = self.cls_conv_channels[-1]\n\n        self.conv_cls = build_conv_layer(\n            conv_cfg,\n            in_channels=prev_channel,\n            out_channels=num_cls_out_channels,\n            kernel_size=1)\n        # add reg specific branch\n        prev_channel = out_channels\n        if len(self.reg_conv_channels) > 0:\n            self.reg_convs = self._add_conv_branch(prev_channel,\n                                                   self.reg_conv_channels)\n            prev_channel = self.reg_conv_channels[-1]\n\n        self.conv_reg = build_conv_layer(\n            conv_cfg,\n            in_channels=prev_channel,\n            out_channels=num_reg_out_channels,\n            kernel_size=1)\n\n    def _add_conv_branch(self, in_channels, conv_channels):\n        \"\"\"Add shared or separable branch.\"\"\"\n        conv_spec = [in_channels] + list(conv_channels)\n        # add branch specific conv layers\n        conv_layers = nn.Sequential()\n        for i in range(len(conv_spec) - 1):\n            conv_layers.add_module(\n                f'layer{i}',\n                ConvModule(\n                    conv_spec[i],\n                    conv_spec[i + 1],\n                    kernel_size=1,\n                    padding=0,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg,\n                    act_cfg=self.act_cfg,\n                    bias=self.bias,\n                    inplace=True))\n        return conv_layers\n\n    def forward(self, feats):\n        \"\"\"Forward.\n\n        Args:\n            feats (Tensor): Input features\n\n        Returns:\n            Tensor: Class scores predictions\n            Tensor: Regression predictions\n        \"\"\"\n        # shared part\n        if len(self.shared_conv_channels) > 0:\n            x = self.shared_convs(feats)\n\n        # separate branches\n        x_cls = x\n        x_reg = x\n\n        if len(self.cls_conv_channels) > 0:\n            x_cls = self.cls_convs(x_cls)\n        cls_score = self.conv_cls(x_cls)\n\n        if len(self.reg_conv_channels) > 0:\n            x_reg = self.reg_convs(x_reg)\n        bbox_pred = self.conv_reg(x_reg)\n\n        return cls_score, bbox_pred\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/base_mono3d_dense_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom abc import ABCMeta, abstractmethod\n\nfrom mmcv.runner import BaseModule\n\n\nclass BaseMono3DDenseHead(BaseModule, metaclass=ABCMeta):\n    \"\"\"Base class for Monocular 3D DenseHeads.\"\"\"\n\n    def __init__(self, init_cfg=None):\n        super(BaseMono3DDenseHead, self).__init__(init_cfg=init_cfg)\n\n    @abstractmethod\n    def loss(self, **kwargs):\n        \"\"\"Compute losses of the head.\"\"\"\n        pass\n\n    @abstractmethod\n    def get_bboxes(self, **kwargs):\n        \"\"\"Transform network output for a batch into bbox predictions.\"\"\"\n        pass\n\n    def forward_train(self,\n                      x,\n                      img_metas,\n                      gt_bboxes,\n                      gt_labels=None,\n                      gt_bboxes_3d=None,\n                      gt_labels_3d=None,\n                      centers2d=None,\n                      depths=None,\n                      attr_labels=None,\n                      gt_bboxes_ignore=None,\n                      proposal_cfg=None,\n                      **kwargs):\n        \"\"\"\n        Args:\n            x (list[Tensor]): Features from FPN.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes (list[Tensor]): Ground truth bboxes of the image,\n                shape (num_gts, 4).\n            gt_labels (list[Tensor]): Ground truth labels of each box,\n                shape (num_gts,).\n            gt_bboxes_3d (list[Tensor]): 3D ground truth bboxes of the image,\n                shape (num_gts, self.bbox_code_size).\n            gt_labels_3d (list[Tensor]): 3D ground truth labels of each box,\n                shape (num_gts,).\n            centers2d (list[Tensor]): Projected 3D center of each box,\n                shape (num_gts, 2).\n            depths (list[Tensor]): Depth of projected 3D center of each box,\n                shape (num_gts,).\n            attr_labels (list[Tensor]): Attribute labels of each box,\n                shape (num_gts,).\n            gt_bboxes_ignore (list[Tensor]): Ground truth bboxes to be\n                ignored, shape (num_ignored_gts, 4).\n            proposal_cfg (mmcv.Config): Test / postprocessing configuration,\n                if None, test_cfg would be used\n\n        Returns:\n            tuple:\n                losses: (dict[str, Tensor]): A dictionary of loss components.\n                proposal_list (list[Tensor]): Proposals of each image.\n        \"\"\"\n        outs = self(x)\n        if gt_labels is None:\n            loss_inputs = outs + (gt_bboxes, gt_bboxes_3d, centers2d, depths,\n                                  attr_labels, img_metas)\n        else:\n            loss_inputs = outs + (gt_bboxes, gt_labels, gt_bboxes_3d,\n                                  gt_labels_3d, centers2d, depths, attr_labels,\n                                  img_metas)\n        losses = self.loss(*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)\n        if proposal_cfg is None:\n            return losses\n        else:\n            proposal_list = self.get_bboxes(*outs, img_metas, cfg=proposal_cfg)\n            return losses, proposal_list\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/centerpoint_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\n\nimport torch\nfrom mmcv.cnn import ConvModule, build_conv_layer\nfrom mmcv.runner import BaseModule\nfrom torch import nn\n\nfrom mmdet3d.core import (circle_nms, draw_heatmap_gaussian, gaussian_radius,\n                          xywhr2xyxyr)\nfrom mmdet3d.core.post_processing import nms_bev\nfrom mmdet3d.models import builder\nfrom mmdet3d.models.utils import clip_sigmoid\nfrom mmdet.core import build_bbox_coder, multi_apply, reduce_mean\nfrom ..builder import HEADS, build_loss\nfrom mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d\nfrom mmcv.runner import BaseModule, force_fp32\n\n@HEADS.register_module()\nclass SeparateHead(BaseModule):\n    \"\"\"SeparateHead for CenterHead.\n\n    Args:\n        in_channels (int): Input channels for conv_layer.\n        heads (dict): Conv information.\n        head_conv (int, optional): Output channels.\n            Default: 64.\n        final_kernel (int, optional): Kernel size for the last conv layer.\n            Default: 1.\n        init_bias (float, optional): Initial bias. Default: -2.19.\n        conv_cfg (dict, optional): Config of conv layer.\n            Default: dict(type='Conv2d')\n        norm_cfg (dict, optional): Config of norm layer.\n            Default: dict(type='BN2d').\n        bias (str, optional): Type of bias. Default: 'auto'.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 heads,\n                 head_conv=64,\n                 final_kernel=1,\n                 init_bias=-2.19,\n                 conv_cfg=dict(type='Conv2d'),\n                 norm_cfg=dict(type='BN2d'),\n                 bias='auto',\n                 init_cfg=None,\n                \n                 **kwargs):\n        assert init_cfg is None, 'To prevent abnormal initialization ' \\\n            'behavior, init_cfg is not allowed to be set'\n        super(SeparateHead, self).__init__(init_cfg=init_cfg)\n        self.heads = heads\n\n        self.init_bias = init_bias\n        for head in self.heads:\n            classes, num_conv = self.heads[head]\n\n            conv_layers = []\n            c_in = in_channels\n            for i in range(num_conv - 1):\n                conv_layers.append(\n                    ConvModule(\n                        c_in,\n                        head_conv,\n                        kernel_size=final_kernel,\n                        stride=1,\n                        padding=final_kernel // 2,\n                        bias=bias,\n                        conv_cfg=conv_cfg,\n                        norm_cfg=norm_cfg))\n                c_in = head_conv\n\n            conv_layers.append(\n                build_conv_layer(\n                    conv_cfg,\n                    head_conv,\n                    classes,\n                    kernel_size=final_kernel,\n                    stride=1,\n                    padding=final_kernel // 2,\n                    bias=True))\n            conv_layers = nn.Sequential(*conv_layers)\n\n            self.__setattr__(head, conv_layers)\n\n            if init_cfg is None:\n                self.init_cfg = dict(type='Kaiming', layer='Conv2d')\n\n    def init_weights(self):\n        \"\"\"Initialize weights.\"\"\"\n        super().init_weights()\n        for head in self.heads:\n            if head == 'heatmap':\n                self.__getattr__(head)[-1].bias.data.fill_(self.init_bias)\n    \n    @force_fp32()\n    def forward(self, x):\n        \"\"\"Forward function for SepHead.\n\n        Args:\n            x (torch.Tensor): Input feature map with the shape of\n                [B, 512, 128, 128].\n\n        Returns:\n            dict[str: torch.Tensor]: contains the following keys:\n\n                -reg （torch.Tensor): 2D regression value with the\n                    shape of [B, 2, H, W].\n                -height (torch.Tensor): Height value with the\n                    shape of [B, 1, H, W].\n                -dim (torch.Tensor): Size value with the shape\n                    of [B, 3, H, W].\n                -rot (torch.Tensor): Rotation value with the\n                    shape of [B, 2, H, W].\n                -vel (torch.Tensor): Velocity value with the\n                    shape of [B, 2, H, W].\n                -heatmap (torch.Tensor): Heatmap with the shape of\n                    [B, N, H, W].\n        \"\"\"\n\n        ret_dict = dict()\n        for head in self.heads:\n            ret_dict[head] = self.__getattr__(head)(x)\n\n        return ret_dict\n\n\n@HEADS.register_module()\nclass DCNSeparateHead(BaseModule):\n    r\"\"\"DCNSeparateHead for CenterHead.\n\n    .. code-block:: none\n            /-----> DCN for heatmap task -----> heatmap task.\n    feature\n            \\-----> DCN for regression tasks -----> regression tasks\n\n    Args:\n        in_channels (int): Input channels for conv_layer.\n        num_cls (int): Number of classes.\n        heads (dict): Conv information.\n        dcn_config (dict): Config of dcn layer.\n        head_conv (int, optional): Output channels.\n            Default: 64.\n        final_kernel (int, optional): Kernel size for the last conv\n            layer. Default: 1.\n        init_bias (float, optional): Initial bias. Default: -2.19.\n        conv_cfg (dict, optional): Config of conv layer.\n            Default: dict(type='Conv2d')\n        norm_cfg (dict, optional): Config of norm layer.\n            Default: dict(type='BN2d').\n        bias (str, optional): Type of bias. Default: 'auto'.\n    \"\"\"  # noqa: W605\n\n    def __init__(self,\n                 in_channels,\n                 num_cls,\n                 heads,\n                 dcn_config,\n                 head_conv=64,\n                 final_kernel=1,\n                 init_bias=-2.19,\n                 conv_cfg=dict(type='Conv2d'),\n                 norm_cfg=dict(type='BN2d'),\n                 bias='auto',\n                 init_cfg=None,\n               \n                 **kwargs):\n        assert init_cfg is None, 'To prevent abnormal initialization ' \\\n            'behavior, init_cfg is not allowed to be set'\n        super(DCNSeparateHead, self).__init__(init_cfg=init_cfg)\n        if 'heatmap' in heads:\n            heads.pop('heatmap')\n        # feature adaptation with dcn\n        # use separate features for classification / regression\n        self.feature_adapt_cls = build_conv_layer(dcn_config)\n\n        self.feature_adapt_reg = build_conv_layer(dcn_config)\n\n        # heatmap prediction head\n        cls_head = [\n            ConvModule(\n                in_channels,\n                head_conv,\n                kernel_size=3,\n                padding=1,\n                conv_cfg=conv_cfg,\n                bias=bias,\n                norm_cfg=norm_cfg),\n            build_conv_layer(\n                conv_cfg,\n                head_conv,\n                num_cls,\n                kernel_size=3,\n                stride=1,\n                padding=1,\n                bias=bias)\n        ]\n        self.cls_head = nn.Sequential(*cls_head)\n        self.init_bias = init_bias\n        # other regression target\n        self.task_head = SeparateHead(\n            in_channels,\n            heads,\n            head_conv=head_conv,\n            final_kernel=final_kernel,\n            bias=bias)\n        if init_cfg is None:\n            self.init_cfg = dict(type='Kaiming', layer='Conv2d')\n\n    def init_weights(self):\n        \"\"\"Initialize weights.\"\"\"\n        super().init_weights()\n        self.cls_head[-1].bias.data.fill_(self.init_bias)\n\n    def forward(self, x):\n        \"\"\"Forward function for DCNSepHead.\n\n        Args:\n            x (torch.Tensor): Input feature map with the shape of\n                [B, 512, 128, 128].\n\n        Returns:\n            dict[str: torch.Tensor]: contains the following keys:\n\n                -reg （torch.Tensor): 2D regression value with the\n                    shape of [B, 2, H, W].\n                -height (torch.Tensor): Height value with the\n                    shape of [B, 1, H, W].\n                -dim (torch.Tensor): Size value with the shape\n                    of [B, 3, H, W].\n                -rot (torch.Tensor): Rotation value with the\n                    shape of [B, 2, H, W].\n                -vel (torch.Tensor): Velocity value with the\n                    shape of [B, 2, H, W].\n                -heatmap (torch.Tensor): Heatmap with the shape of\n                    [B, N, H, W].\n        \"\"\"\n        center_feat = self.feature_adapt_cls(x)\n        reg_feat = self.feature_adapt_reg(x)\n\n        cls_score = self.cls_head(center_feat)\n        ret = self.task_head(reg_feat)\n        ret['heatmap'] = cls_score\n\n        return ret\n\nimport torch.utils.checkpoint as cp\n\n@HEADS.register_module()\nclass CenterHead(BaseModule):\n    \"\"\"CenterHead for CenterPoint.\n\n    Args:\n        in_channels (list[int] | int, optional): Channels of the input\n            feature map. Default: [128].\n        tasks (list[dict], optional): Task information including class number\n            and class names. Default: None.\n        train_cfg (dict, optional): Train-time configs. Default: None.\n        test_cfg (dict, optional): Test-time configs. Default: None.\n        bbox_coder (dict, optional): Bbox coder configs. Default: None.\n        common_heads (dict, optional): Conv information for common heads.\n            Default: dict().\n        loss_cls (dict, optional): Config of classification loss function.\n            Default: dict(type='GaussianFocalLoss', reduction='mean').\n        loss_bbox (dict, optional): Config of regression loss function.\n            Default: dict(type='L1Loss', reduction='none').\n        separate_head (dict, optional): Config of separate head. Default: dict(\n            type='SeparateHead', init_bias=-2.19, final_kernel=3)\n        share_conv_channel (int, optional): Output channels for share_conv\n            layer. Default: 64.\n        num_heatmap_convs (int, optional): Number of conv layers for heatmap\n            conv layer. Default: 2.\n        conv_cfg (dict, optional): Config of conv layer.\n            Default: dict(type='Conv2d')\n        norm_cfg (dict, optional): Config of norm layer.\n            Default: dict(type='BN2d').\n        bias (str, optional): Type of bias. Default: 'auto'.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=[128],\n                 tasks=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 bbox_coder=None,\n                 common_heads=dict(),\n                 with_cp=False,\n                 loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),\n                 loss_bbox=dict(\n                     type='L1Loss', reduction='none', loss_weight=0.25),\n                 separate_head=dict(\n                     type='SeparateHead', init_bias=-2.19, final_kernel=3),\n                 share_conv_channel=64,\n                 num_heatmap_convs=2,\n                 conv_cfg=dict(type='Conv2d'),\n                 norm_cfg=dict(type='BN2d'),\n                 bias='auto',\n                 norm_bbox=True,\n                 init_cfg=None,\n                 voxel2bev=False,\n                loss_weight_per_task=1.0, # balance differet tasks, such as seg, occupancy.\n                 task_specific=True):\n        assert init_cfg is None, 'To prevent abnormal initialization ' \\\n            'behavior, init_cfg is not allowed to be set'\n        super(CenterHead, self).__init__(init_cfg=init_cfg)\n        self.voxel2bev = voxel2bev\n        self.loss_weight_per_task = loss_weight_per_task\n        if self.voxel2bev:\n            self.voxel2bev_layer = nn.Conv3d(in_channels, in_channels, (1, 1, 8), (1, 1, 1), (0, 0, 0))\n        num_classes = [len(t['class_names']) for t in tasks]\n        self.class_names = [t['class_names'] for t in tasks]\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.in_channels = in_channels\n        self.num_classes = num_classes\n        self.norm_bbox = norm_bbox\n        self.with_cp = with_cp\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_bbox = build_loss(loss_bbox)\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n        self.num_anchor_per_locs = [n for n in num_classes]\n        self.fp16_enabled = False\n\n        # a shared convolution\n        self.shared_conv = ConvModule(\n            in_channels,\n            share_conv_channel,\n            kernel_size=3,\n            padding=1,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            bias=bias)\n\n        self.task_heads = nn.ModuleList()\n\n        for num_cls in num_classes:\n            heads = copy.deepcopy(common_heads)\n            heads.update(dict(heatmap=(num_cls, num_heatmap_convs)))\n            separate_head.update(\n                in_channels=share_conv_channel, heads=heads, num_cls=num_cls)\n            self.task_heads.append(builder.build_head(separate_head))\n\n        self.with_velocity = 'vel' in common_heads.keys()\n        self.task_specific = task_specific\n\n    def forward_single(self, x):\n        \"\"\"Forward function for CenterPoint.\n\n        Args:\n            x (torch.Tensor): Input feature map with the shape of\n                [B, 512, 128, 128].\n\n        Returns:\n            list[dict]: Output results for tasks.\n        \"\"\"\n        ret_dicts = []\n        if self.with_cp:\n            x = cp.checkpoint(self.shared_conv, x)\n        else:\n            x = self.shared_conv(x)\n\n        for task in self.task_heads:\n            ret_dicts.append(task(x))\n\n        return ret_dicts\n\n    def forward(self, input_dict, *args, **kwargs):\n        \"\"\"Forward pass.\n\n        Args:\n            feats (list[torch.Tensor]): Multi-level features, e.g.,\n                features produced by FPN.\n\n        Returns:\n            tuple(list[dict]): Output results for tasks.\n        \"\"\"\n        if isinstance(input_dict, dict):\n            if input_dict['img_bev_feat'][0].dim() == 5:\n                mlvl_feats = [level.mean(-1) for level in input_dict['img_bev_feat']]\n            else:\n                mlvl_feats = input_dict['img_bev_feat']\n            if not isinstance(mlvl_feats, list):\n                mlvl_feats = [mlvl_feats]\n        elif isinstance(input_dict, list):\n            mlvl_feats = input_dict\n        return multi_apply(self.forward_single, mlvl_feats)\n\n    def _gather_feat(self, feat, ind, mask=None):\n        \"\"\"Gather feature map.\n\n        Given feature map and index, return indexed feature map.\n\n        Args:\n            feat (torch.tensor): Feature map with the shape of [B, H*W, 10].\n            ind (torch.Tensor): Index of the ground truth boxes with the\n                shape of [B, max_obj].\n            mask (torch.Tensor, optional): Mask of the feature map with the\n                shape of [B, max_obj]. Default: None.\n\n        Returns:\n            torch.Tensor: Feature map after gathering with the shape\n                of [B, max_obj, 10].\n        \"\"\"\n        dim = feat.size(2)\n        ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)\n        feat = feat.gather(1, ind)\n        if mask is not None:\n            mask = mask.unsqueeze(2).expand_as(feat)\n            feat = feat[mask]\n            feat = feat.view(-1, dim)\n        return feat\n\n    def get_targets(self, gt_bboxes_3d, gt_labels_3d):\n        \"\"\"Generate targets.\n\n        How each output is transformed:\n\n            Each nested list is transposed so that all same-index elements in\n            each sub-list (1, ..., N) become the new sub-lists.\n                [ [a0, a1, a2, ... ], [b0, b1, b2, ... ], ... ]\n                ==> [ [a0, b0, ... ], [a1, b1, ... ], [a2, b2, ... ] ]\n\n            The new transposed nested list is converted into a list of N\n            tensors generated by concatenating tensors in the new sub-lists.\n                [ tensor0, tensor1, tensor2, ... ]\n\n        Args:\n            gt_bboxes_3d (list[:obj:`LiDARInstance3DBoxes`]): Ground\n                truth gt boxes.\n            gt_labels_3d (list[torch.Tensor]): Labels of boxes.\n\n        Returns:\n            Returns:\n                tuple[list[torch.Tensor]]: Tuple of target including\n                    the following results in order.\n\n                    - list[torch.Tensor]: Heatmap scores.\n                    - list[torch.Tensor]: Ground truth boxes.\n                    - list[torch.Tensor]: Indexes indicating the\n                        position of the valid boxes.\n                    - list[torch.Tensor]: Masks indicating which\n                        boxes are valid.\n        \"\"\"\n        heatmaps, anno_boxes, inds, masks = multi_apply(\n            self.get_targets_single, gt_bboxes_3d, gt_labels_3d)\n        # Transpose heatmaps\n        heatmaps = list(map(list, zip(*heatmaps)))\n        heatmaps = [torch.stack(hms_) for hms_ in heatmaps]\n        # Transpose anno_boxes\n        anno_boxes = list(map(list, zip(*anno_boxes)))\n        anno_boxes = [torch.stack(anno_boxes_) for anno_boxes_ in anno_boxes]\n        # Transpose inds\n        inds = list(map(list, zip(*inds)))\n        inds = [torch.stack(inds_) for inds_ in inds]\n        # Transpose inds\n        masks = list(map(list, zip(*masks)))\n        masks = [torch.stack(masks_) for masks_ in masks]\n        return heatmaps, anno_boxes, inds, masks\n\n    def get_targets_single(self, gt_bboxes_3d, gt_labels_3d):\n        \"\"\"Generate training targets for a single sample.\n\n        Args:\n            gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): Ground truth gt boxes.\n            gt_labels_3d (torch.Tensor): Labels of boxes.\n\n        Returns:\n            tuple[list[torch.Tensor]]: Tuple of target including\n                the following results in order.\n\n                - list[torch.Tensor]: Heatmap scores.\n                - list[torch.Tensor]: Ground truth boxes.\n                - list[torch.Tensor]: Indexes indicating the position\n                    of the valid boxes.\n                - list[torch.Tensor]: Masks indicating which boxes\n                    are valid.\n        \"\"\"\n        device = gt_labels_3d.device\n        gt_bboxes_3d = torch.cat(\n            (gt_bboxes_3d.gravity_center, gt_bboxes_3d.tensor[:, 3:]),\n            dim=1).to(device)\n        max_objs = self.train_cfg['max_objs'] * self.train_cfg['dense_reg']\n        grid_size = torch.tensor(self.train_cfg['grid_size'])\n        pc_range = torch.tensor(self.train_cfg['point_cloud_range'])\n        voxel_size = torch.tensor(self.train_cfg['voxel_size'])\n\n        feature_map_size = grid_size[:2] // self.train_cfg['out_size_factor']\n\n        # reorganize the gt_dict by tasks\n        task_masks = []\n        flag = 0\n        for class_name in self.class_names:\n            task_masks.append([\n                torch.where(gt_labels_3d == class_name.index(i) + flag)\n                for i in class_name\n            ])\n            flag += len(class_name)\n\n        task_boxes = []\n        task_classes = []\n        flag2 = 0\n        for idx, mask in enumerate(task_masks):\n            task_box = []\n            task_class = []\n            for m in mask:\n                task_box.append(gt_bboxes_3d[m])\n                # 0 is background for each task, so we need to add 1 here.\n                task_class.append(gt_labels_3d[m] + 1 - flag2)\n            task_boxes.append(torch.cat(task_box, axis=0).to(device))\n            task_classes.append(torch.cat(task_class).long().to(device))\n            flag2 += len(mask)\n        draw_gaussian = draw_heatmap_gaussian\n        heatmaps, anno_boxes, inds, masks = [], [], [], []\n\n        for idx, task_head in enumerate(self.task_heads):\n            heatmap = gt_bboxes_3d.new_zeros(\n                (len(self.class_names[idx]), feature_map_size[1],\n                 feature_map_size[0]))\n\n            if self.with_velocity:\n                anno_box = gt_bboxes_3d.new_zeros((max_objs, 10),\n                                                  dtype=torch.float32)\n            else:\n                anno_box = gt_bboxes_3d.new_zeros((max_objs, 8),\n                                                  dtype=torch.float32)\n\n            ind = gt_labels_3d.new_zeros((max_objs), dtype=torch.int64)\n            mask = gt_bboxes_3d.new_zeros((max_objs), dtype=torch.uint8)\n\n            num_objs = min(task_boxes[idx].shape[0], max_objs)\n\n            for k in range(num_objs):\n                cls_id = task_classes[idx][k] - 1\n\n                width = task_boxes[idx][k][3]\n                length = task_boxes[idx][k][4]\n                width = width / voxel_size[0] / self.train_cfg[\n                    'out_size_factor']\n                length = length / voxel_size[1] / self.train_cfg[\n                    'out_size_factor']\n\n                if width > 0 and length > 0:\n                    radius = gaussian_radius(\n                        (length, width),\n                        min_overlap=self.train_cfg['gaussian_overlap'])\n                    radius = max(self.train_cfg['min_radius'], int(radius))\n\n                    # be really careful for the coordinate system of\n                    # your box annotation.\n                    x, y, z = task_boxes[idx][k][0], task_boxes[idx][k][\n                        1], task_boxes[idx][k][2]\n\n                    coor_x = (\n                        x - pc_range[0]\n                    ) / voxel_size[0] / self.train_cfg['out_size_factor']\n                    coor_y = (\n                        y - pc_range[1]\n                    ) / voxel_size[1] / self.train_cfg['out_size_factor']\n\n                    center = torch.tensor([coor_x, coor_y],\n                                          dtype=torch.float32,\n                                          device=device)\n                    center_int = center.to(torch.int32)\n\n                    # throw out not in range objects to avoid out of array\n                    # area when creating the heatmap\n                    if not (0 <= center_int[0] < feature_map_size[0]\n                            and 0 <= center_int[1] < feature_map_size[1]):\n                        continue\n\n                    draw_gaussian(heatmap[cls_id], center_int, radius)\n\n                    new_idx = k\n                    x, y = center_int[0], center_int[1]\n\n                    assert (y * feature_map_size[0] + x <\n                            feature_map_size[0] * feature_map_size[1])\n\n                    ind[new_idx] = y * feature_map_size[0] + x\n                    mask[new_idx] = 1\n                    # TODO: support other outdoor dataset\n                    rot = task_boxes[idx][k][6]\n                    box_dim = task_boxes[idx][k][3:6]\n                    if self.norm_bbox:\n                        box_dim = box_dim.log()\n                    if self.with_velocity:\n                        vx, vy = task_boxes[idx][k][7:]\n                        anno_box[new_idx] = torch.cat([\n                            center - torch.tensor([x, y], device=device),\n                            z.unsqueeze(0), box_dim,\n                            torch.sin(rot).unsqueeze(0),\n                            torch.cos(rot).unsqueeze(0),\n                            vx.unsqueeze(0),\n                            vy.unsqueeze(0)\n                        ])\n                    else:\n                        anno_box[new_idx] = torch.cat([\n                            center - torch.tensor([x, y], device=device),\n                            z.unsqueeze(0), box_dim,\n                            torch.sin(rot).unsqueeze(0),\n                            torch.cos(rot).unsqueeze(0)\n                        ])\n\n            heatmaps.append(heatmap)\n            anno_boxes.append(anno_box)\n            masks.append(mask)\n            inds.append(ind)\n        return heatmaps, anno_boxes, inds, masks\n\n    def loss(self,  gt_bboxes_3d, gt_labels_3d, preds_dicts, img_metas=None, **kwargs):\n        heatmaps, anno_boxes, inds, masks = self.get_targets(\n            gt_bboxes_3d, gt_labels_3d)\n\n        return self.loss_(heatmaps, anno_boxes, inds, masks, preds_dicts, **kwargs)\n\n    @force_fp32()\n    def loss_(self, heatmaps, anno_boxes, inds, masks, preds_dicts, **kwargs):\n        \"\"\"Loss function for CenterHead.\n\n        Args:\n            gt_bboxes_3d (list[:obj:`LiDARInstance3DBoxes`]): Ground\n                truth gt boxes.\n            gt_labels_3d (list[torch.Tensor]): Labels of boxes.\n            preds_dicts (dict): Output of forward function.\n\n        Returns:\n            dict[str:torch.Tensor]: Loss of heatmap and bbox of each task.\n        \"\"\"\n\n        loss_dict = dict()\n        if not self.task_specific:\n            loss_dict['loss'] = 0\n        for task_id, preds_dict in enumerate(preds_dicts):\n            # heatmap focal loss\n            preds_dict[0]['heatmap'] = clip_sigmoid(preds_dict[0]['heatmap'])\n            num_pos = heatmaps[task_id].eq(1).float().sum().item()\n            cls_avg_factor = torch.clamp(\n                reduce_mean(heatmaps[task_id].new_tensor(num_pos)),\n                min=1).item()\n            loss_heatmap = self.loss_cls(\n                preds_dict[0]['heatmap'],\n                heatmaps[task_id],\n                avg_factor=cls_avg_factor)\n            target_box = anno_boxes[task_id]\n            # reconstruct the anno_box from multiple reg heads\n            preds_dict[0]['anno_box'] = torch.cat(\n                (\n                    preds_dict[0]['reg'],\n                    preds_dict[0]['height'],\n                    preds_dict[0]['dim'],\n                    preds_dict[0]['rot'],\n                    preds_dict[0]['vel'],\n                ),\n                dim=1,\n            )\n\n            # Regression loss for dimension, offset, height, rotation\n            num = masks[task_id].float().sum()\n            ind = inds[task_id]\n            pred = preds_dict[0]['anno_box'].permute(0, 2, 3, 1).contiguous()\n            pred = pred.view(pred.size(0), -1, pred.size(3))\n            pred = self._gather_feat(pred, ind)\n            mask = masks[task_id].unsqueeze(2).expand_as(target_box).float()\n            num = torch.clamp(\n                reduce_mean(target_box.new_tensor(num)), min=1e-4).item()\n            isnotnan = (~torch.isnan(target_box)).float()\n            mask *= isnotnan\n            code_weights = self.train_cfg['code_weights']\n            bbox_weights = mask * mask.new_tensor(code_weights)\n            if self.task_specific:\n                name_list = ['xy', 'z', 'whl', 'yaw', 'vel']\n                clip_index = [0, 2, 3, 6, 8, 10]\n                for reg_task_id in range(len(name_list)):\n                    pred_tmp = pred[\n                        ...,\n                        clip_index[reg_task_id]:clip_index[reg_task_id + 1]]\n                    target_box_tmp = target_box[\n                        ...,\n                        clip_index[reg_task_id]:clip_index[reg_task_id + 1]]\n                    bbox_weights_tmp = bbox_weights[\n                        ...,\n                        clip_index[reg_task_id]:clip_index[reg_task_id + 1]]\n                    loss_bbox_tmp = self.loss_bbox(\n                        pred_tmp,\n                        target_box_tmp,\n                        bbox_weights_tmp,\n                        avg_factor=(num + 1e-4))\n                    loss_dict[f'task{task_id}.loss_%s' %\n                              (name_list[reg_task_id])] = loss_bbox_tmp * self.loss_weight_per_task\n                loss_dict[f'task{task_id}.loss_heatmap'] = loss_heatmap * self.loss_weight_per_task\n            else:\n                loss_bbox = self.loss_bbox(\n                    pred, target_box, bbox_weights, avg_factor=num)\n                loss_dict['loss'] += loss_bbox * self.loss_weight_per_task\n                loss_dict['loss'] += loss_heatmap * self.loss_weight_per_task\n\n        return loss_dict\n\n    def get_bboxes(self, preds_dicts, img_metas, img=None, rescale=False):\n        \"\"\"Generate bboxes from bbox head predictions.\n\n        Args:\n            preds_dicts (tuple[list[dict]]): Prediction results.\n            img_metas (list[dict]): Point cloud and image's meta info.\n\n        Returns:\n            list[dict]: Decoded bbox, scores and labels after nms.\n        \"\"\"\n        rets = []\n        for task_id, preds_dict in enumerate(preds_dicts):\n            num_class_with_bg = self.num_classes[task_id]\n            batch_size = preds_dict[0]['heatmap'].shape[0]\n            batch_heatmap = preds_dict[0]['heatmap'].sigmoid()\n\n            batch_reg = preds_dict[0]['reg']\n            batch_hei = preds_dict[0]['height']\n\n            if self.norm_bbox:\n                batch_dim = torch.exp(preds_dict[0]['dim'])\n            else:\n                batch_dim = preds_dict[0]['dim']\n\n            batch_rots = preds_dict[0]['rot'][:, 0].unsqueeze(1)\n            batch_rotc = preds_dict[0]['rot'][:, 1].unsqueeze(1)\n\n            if 'vel' in preds_dict[0]:\n                batch_vel = preds_dict[0]['vel']\n            else:\n                batch_vel = None\n            temp = self.bbox_coder.decode(\n                batch_heatmap,\n                batch_rots,\n                batch_rotc,\n                batch_hei,\n                batch_dim,\n                batch_vel,\n                reg=batch_reg,\n                task_id=task_id)\n            batch_reg_preds = [box['bboxes'] for box in temp]\n            batch_cls_preds = [box['scores'] for box in temp]\n            batch_cls_labels = [box['labels'] for box in temp]\n            nms_type = self.test_cfg.get('nms_type')\n            if isinstance(nms_type, list):\n                nms_type = nms_type[task_id]\n            if nms_type == 'circle':\n                ret_task = []\n                for i in range(batch_size):\n                    boxes3d = temp[i]['bboxes']\n                    scores = temp[i]['scores']\n                    labels = temp[i]['labels']\n                    centers = boxes3d[:, [0, 1]]\n                    boxes = torch.cat([centers, scores.view(-1, 1)], dim=1)\n                    keep = torch.tensor(\n                        circle_nms(\n                            boxes.detach().cpu().numpy(),\n                            self.test_cfg['min_radius'][task_id],\n                            post_max_size=self.test_cfg['post_max_size']),\n                        dtype=torch.long,\n                        device=boxes.device)\n\n                    boxes3d = boxes3d[keep]\n                    scores = scores[keep]\n                    labels = labels[keep]\n                    ret = dict(bboxes=boxes3d, scores=scores, labels=labels)\n                    ret_task.append(ret)\n                rets.append(ret_task)\n            else:\n                rets.append(\n                    self.get_task_detections(num_class_with_bg,\n                                             batch_cls_preds, batch_reg_preds,\n                                             batch_cls_labels, img_metas,\n                                             task_id))\n\n        # Merge branches results\n        num_samples = len(rets[0])\n\n        ret_list = []\n        for i in range(num_samples):\n            for k in rets[0][i].keys():\n                if k == 'bboxes':\n                    bboxes = torch.cat([ret[i][k] for ret in rets])\n                    bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 5] * 0.5\n                    bboxes = img_metas[i]['box_type_3d'](\n                        bboxes, self.bbox_coder.code_size)\n                elif k == 'scores':\n                    scores = torch.cat([ret[i][k] for ret in rets])\n                elif k == 'labels':\n                    flag = 0\n                    for j, num_class in enumerate(self.num_classes):\n                        rets[j][i][k] += flag\n                        flag += num_class\n                    labels = torch.cat([ret[i][k].int() for ret in rets])\n            ret_list.append(bbox3d2result(bboxes, scores, labels))\n        return ret_list\n\n    def get_task_detections(self, num_class_with_bg, batch_cls_preds,\n                            batch_reg_preds, batch_cls_labels, img_metas,\n                            task_id):\n        \"\"\"Rotate nms for each task.\n\n        Args:\n            num_class_with_bg (int): Number of classes for the current task.\n            batch_cls_preds (list[torch.Tensor]): Prediction score with the\n                shape of [N].\n            batch_reg_preds (list[torch.Tensor]): Prediction bbox with the\n                shape of [N, 9].\n            batch_cls_labels (list[torch.Tensor]): Prediction label with the\n                shape of [N].\n            img_metas (list[dict]): Meta information of each sample.\n\n        Returns:\n            list[dict[str: torch.Tensor]]: contains the following keys:\n\n                -bboxes (torch.Tensor): Prediction bboxes after nms with the\n                    shape of [N, 9].\n                -scores (torch.Tensor): Prediction scores after nms with the\n                    shape of [N].\n                -labels (torch.Tensor): Prediction labels after nms with the\n                    shape of [N].\n        \"\"\"\n        predictions_dicts = []\n        post_center_range = self.test_cfg['post_center_limit_range']\n        if len(post_center_range) > 0:\n            post_center_range = torch.tensor(\n                post_center_range,\n                dtype=batch_reg_preds[0].dtype,\n                device=batch_reg_preds[0].device)\n\n        for i, (box_preds, cls_preds, cls_labels) in enumerate(\n                zip(batch_reg_preds, batch_cls_preds, batch_cls_labels)):\n            default_val = [1.0 for _ in range(len(self.task_heads))]\n            factor = self.test_cfg.get('nms_rescale_factor',\n                                       default_val)[task_id]\n            if isinstance(factor, list):\n                for cid in range(len(factor)):\n                    box_preds[cls_labels == cid, 3:6] = \\\n                        box_preds[cls_labels == cid, 3:6] * factor[cid]\n            else:\n                box_preds[:, 3:6] = box_preds[:, 3:6] * factor\n\n            # Apply NMS in birdeye view\n\n            # get the highest score per prediction, then apply nms\n            # to remove overlapped box.\n            if num_class_with_bg == 1:\n                top_scores = cls_preds.squeeze(-1)\n                top_labels = torch.zeros(\n                    cls_preds.shape[0],\n                    device=cls_preds.device,\n                    dtype=torch.long)\n\n            else:\n                top_labels = cls_labels.long()\n                top_scores = cls_preds.squeeze(-1)\n\n            if self.test_cfg['score_threshold'] > 0.0:\n                thresh = torch.tensor(\n                    [self.test_cfg['score_threshold']],\n                    device=cls_preds.device).type_as(cls_preds)\n                top_scores_keep = top_scores >= thresh\n                top_scores = top_scores.masked_select(top_scores_keep)\n\n            if top_scores.shape[0] != 0:\n                if self.test_cfg['score_threshold'] > 0.0:\n                    box_preds = box_preds[top_scores_keep]\n                    top_labels = top_labels[top_scores_keep]\n                boxes_for_nms = img_metas[i]['box_type_3d'](\n                    box_preds[:, :], self.bbox_coder.code_size).bev\n                # the nms in 3d detection just remove overlap boxes.\n                if isinstance(self.test_cfg['nms_thr'], list):\n                    nms_thresh = self.test_cfg['nms_thr'][task_id]\n                else:\n                    nms_thresh = self.test_cfg['nms_thr']\n                selected = nms_bev(\n                    boxes_for_nms,\n                    top_scores,\n                    thresh=nms_thresh,\n                    pre_max_size=self.test_cfg['pre_max_size'],\n                    post_max_size=self.test_cfg['post_max_size'],\n                    xyxyr2xywhr=False)\n            else:\n                selected = []\n\n            if isinstance(factor, list):\n                for cid in range(len(factor)):\n                    box_preds[top_labels == cid, 3:6] = \\\n                        box_preds[top_labels == cid, 3:6] / factor[cid]\n            else:\n                box_preds[:, 3:6] = box_preds[:, 3:6] / factor\n\n            # if selected is not None:\n            selected_boxes = box_preds[selected]\n            selected_labels = top_labels[selected]\n            selected_scores = top_scores[selected]\n\n            # finally generate predictions.\n            if selected_boxes.shape[0] != 0:\n                box_preds = selected_boxes\n                scores = selected_scores\n                label_preds = selected_labels\n                final_box_preds = box_preds\n                final_scores = scores\n                final_labels = label_preds\n                if post_center_range is not None:\n                    mask = (final_box_preds[:, :3] >=\n                            post_center_range[:3]).all(1)\n                    mask &= (final_box_preds[:, :3] <=\n                             post_center_range[3:]).all(1)\n                    predictions_dict = dict(\n                        bboxes=final_box_preds[mask],\n                        scores=final_scores[mask],\n                        labels=final_labels[mask])\n                else:\n                    predictions_dict = dict(\n                        bboxes=final_box_preds,\n                        scores=final_scores,\n                        labels=final_labels)\n            else:\n                dtype = batch_reg_preds[0].dtype\n                device = batch_reg_preds[0].device\n                predictions_dict = dict(\n                    bboxes=torch.zeros([0, self.bbox_coder.code_size],\n                                       dtype=dtype,\n                                       device=device),\n                    scores=torch.zeros([0], dtype=dtype, device=device),\n                    labels=torch.zeros([0],\n                                       dtype=top_labels.dtype,\n                                       device=device))\n\n            predictions_dicts.append(predictions_dict)\n        return predictions_dicts\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/centerpoint_head_single_task.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\n\nimport torch\nfrom mmcv.cnn import ConvModule, build_conv_layer\nfrom mmcv.runner import BaseModule\nfrom torch import nn\n\nfrom mmdet3d.core import (circle_nms, draw_heatmap_gaussian, gaussian_radius,\n                          xywhr2xyxyr)\nfrom mmdet3d.core.post_processing import nms_bev\nfrom mmdet3d.models import builder\nfrom mmdet3d.models.utils import clip_sigmoid\nfrom mmdet.core import build_bbox_coder, multi_apply, reduce_mean\nfrom ..builder import HEADS, build_loss\n\n\n@HEADS.register_module()\nclass SeparateHead(BaseModule):\n    \"\"\"SeparateHead for CenterHead.\n\n    Args:\n        in_channels (int): Input channels for conv_layer.\n        heads (dict): Conv information.\n        head_conv (int, optional): Output channels.\n            Default: 64.\n        final_kernel (int, optional): Kernel size for the last conv layer.\n            Default: 1.\n        init_bias (float, optional): Initial bias. Default: -2.19.\n        conv_cfg (dict, optional): Config of conv layer.\n            Default: dict(type='Conv2d')\n        norm_cfg (dict, optional): Config of norm layer.\n            Default: dict(type='BN2d').\n        bias (str, optional): Type of bias. Default: 'auto'.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 heads,\n                 head_conv=64,\n                 final_kernel=1,\n                 init_bias=-2.19,\n                 conv_cfg=dict(type='Conv2d'),\n                 norm_cfg=dict(type='BN2d'),\n                 bias='auto',\n                 init_cfg=None,\n                 **kwargs):\n        assert init_cfg is None, 'To prevent abnormal initialization ' \\\n            'behavior, init_cfg is not allowed to be set'\n        super(SeparateHead, self).__init__(init_cfg=init_cfg)\n        self.heads = heads\n        self.init_bias = init_bias\n        for head in self.heads:\n            classes, num_conv = self.heads[head]\n\n            conv_layers = []\n            c_in = in_channels\n            for i in range(num_conv - 1):\n                conv_layers.append(\n                    ConvModule(\n                        c_in,\n                        head_conv,\n                        kernel_size=final_kernel,\n                        stride=1,\n                        padding=final_kernel // 2,\n                        bias=bias,\n                        conv_cfg=conv_cfg,\n                        norm_cfg=norm_cfg))\n                c_in = head_conv\n\n            conv_layers.append(\n                build_conv_layer(\n                    conv_cfg,\n                    head_conv,\n                    classes,\n                    kernel_size=final_kernel,\n                    stride=1,\n                    padding=final_kernel // 2,\n                    bias=True))\n            conv_layers = nn.Sequential(*conv_layers)\n\n            self.__setattr__(head, conv_layers)\n\n            if init_cfg is None:\n                self.init_cfg = dict(type='Kaiming', layer='Conv2d')\n\n    def init_weights(self):\n        \"\"\"Initialize weights.\"\"\"\n        super().init_weights()\n        for head in self.heads:\n            if head == 'heatmap':\n                self.__getattr__(head)[-1].bias.data.fill_(self.init_bias)\n\n    def forward(self, x):\n        \"\"\"Forward function for SepHead.\n\n        Args:\n            x (torch.Tensor): Input feature map with the shape of\n                [B, 512, 128, 128].\n\n        Returns:\n            dict[str: torch.Tensor]: contains the following keys:\n\n                -reg （torch.Tensor): 2D regression value with the\n                    shape of [B, 2, H, W].\n                -height (torch.Tensor): Height value with the\n                    shape of [B, 1, H, W].\n                -dim (torch.Tensor): Size value with the shape\n                    of [B, 3, H, W].\n                -rot (torch.Tensor): Rotation value with the\n                    shape of [B, 2, H, W].\n                -vel (torch.Tensor): Velocity value with the\n                    shape of [B, 2, H, W].\n                -heatmap (torch.Tensor): Heatmap with the shape of\n                    [B, N, H, W].\n        \"\"\"\n        ret_dict = dict()\n        for head in self.heads:\n            ret_dict[head] = self.__getattr__(head)(x)\n\n        return ret_dict\n\n\n@HEADS.register_module()\nclass DCNSeparateHead(BaseModule):\n    r\"\"\"DCNSeparateHead for CenterHead.\n\n    .. code-block:: none\n            /-----> DCN for heatmap task -----> heatmap task.\n    feature\n            \\-----> DCN for regression tasks -----> regression tasks\n\n    Args:\n        in_channels (int): Input channels for conv_layer.\n        num_cls (int): Number of classes.\n        heads (dict): Conv information.\n        dcn_config (dict): Config of dcn layer.\n        head_conv (int, optional): Output channels.\n            Default: 64.\n        final_kernel (int, optional): Kernel size for the last conv\n            layer. Default: 1.\n        init_bias (float, optional): Initial bias. Default: -2.19.\n        conv_cfg (dict, optional): Config of conv layer.\n            Default: dict(type='Conv2d')\n        norm_cfg (dict, optional): Config of norm layer.\n            Default: dict(type='BN2d').\n        bias (str, optional): Type of bias. Default: 'auto'.\n    \"\"\"  # noqa: W605\n\n    def __init__(self,\n                 in_channels,\n                 num_cls,\n                 heads,\n                 dcn_config,\n                 head_conv=64,\n                 final_kernel=1,\n                 init_bias=-2.19,\n                 conv_cfg=dict(type='Conv2d'),\n                 norm_cfg=dict(type='BN2d'),\n                 bias='auto',\n                 init_cfg=None,\n                 **kwargs):\n        assert init_cfg is None, 'To prevent abnormal initialization ' \\\n            'behavior, init_cfg is not allowed to be set'\n        super(DCNSeparateHead, self).__init__(init_cfg=init_cfg)\n        if 'heatmap' in heads:\n            heads.pop('heatmap')\n        # feature adaptation with dcn\n        # use separate features for classification / regression\n        self.feature_adapt_cls = build_conv_layer(dcn_config)\n\n        self.feature_adapt_reg = build_conv_layer(dcn_config)\n\n        # heatmap prediction head\n        cls_head = [\n            ConvModule(\n                in_channels,\n                head_conv,\n                kernel_size=3,\n                padding=1,\n                conv_cfg=conv_cfg,\n                bias=bias,\n                norm_cfg=norm_cfg),\n            build_conv_layer(\n                conv_cfg,\n                head_conv,\n                num_cls,\n                kernel_size=3,\n                stride=1,\n                padding=1,\n                bias=bias)\n        ]\n        self.cls_head = nn.Sequential(*cls_head)\n        self.init_bias = init_bias\n        # other regression target\n        self.task_head = SeparateHead(\n            in_channels,\n            heads,\n            head_conv=head_conv,\n            final_kernel=final_kernel,\n            bias=bias)\n        if init_cfg is None:\n            self.init_cfg = dict(type='Kaiming', layer='Conv2d')\n\n    def init_weights(self):\n        \"\"\"Initialize weights.\"\"\"\n        super().init_weights()\n        self.cls_head[-1].bias.data.fill_(self.init_bias)\n\n    def forward(self, x):\n        \"\"\"Forward function for DCNSepHead.\n\n        Args:\n            x (torch.Tensor): Input feature map with the shape of\n                [B, 512, 128, 128].\n\n        Returns:\n            dict[str: torch.Tensor]: contains the following keys:\n\n                -reg （torch.Tensor): 2D regression value with the\n                    shape of [B, 2, H, W].\n                -height (torch.Tensor): Height value with the\n                    shape of [B, 1, H, W].\n                -dim (torch.Tensor): Size value with the shape\n                    of [B, 3, H, W].\n                -rot (torch.Tensor): Rotation value with the\n                    shape of [B, 2, H, W].\n                -vel (torch.Tensor): Velocity value with the\n                    shape of [B, 2, H, W].\n                -heatmap (torch.Tensor): Heatmap with the shape of\n                    [B, N, H, W].\n        \"\"\"\n        center_feat = self.feature_adapt_cls(x)\n        reg_feat = self.feature_adapt_reg(x)\n\n        cls_score = self.cls_head(center_feat)\n        ret = self.task_head(reg_feat)\n        ret['heatmap'] = cls_score\n\n        return ret\n\n\n@HEADS.register_module()\nclass CenterHead(BaseModule):\n    \"\"\"CenterHead for CenterPoint.\n\n    Args:\n        in_channels (list[int] | int, optional): Channels of the input\n            feature map. Default: [128].\n        tasks (list[dict], optional): Task information including class number\n            and class names. Default: None.\n        train_cfg (dict, optional): Train-time configs. Default: None.\n        test_cfg (dict, optional): Test-time configs. Default: None.\n        bbox_coder (dict, optional): Bbox coder configs. Default: None.\n        common_heads (dict, optional): Conv information for common heads.\n            Default: dict().\n        loss_cls (dict, optional): Config of classification loss function.\n            Default: dict(type='GaussianFocalLoss', reduction='mean').\n        loss_bbox (dict, optional): Config of regression loss function.\n            Default: dict(type='L1Loss', reduction='none').\n        separate_head (dict, optional): Config of separate head. Default: dict(\n            type='SeparateHead', init_bias=-2.19, final_kernel=3)\n        share_conv_channel (int, optional): Output channels for share_conv\n            layer. Default: 64.\n        num_heatmap_convs (int, optional): Number of conv layers for heatmap\n            conv layer. Default: 2.\n        conv_cfg (dict, optional): Config of conv layer.\n            Default: dict(type='Conv2d')\n        norm_cfg (dict, optional): Config of norm layer.\n            Default: dict(type='BN2d').\n        bias (str, optional): Type of bias. Default: 'auto'.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=[128],\n                 tasks=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 bbox_coder=None,\n                 common_heads=dict(),\n                 loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),\n                 loss_bbox=dict(\n                     type='L1Loss', reduction='none', loss_weight=0.25),\n                 separate_head=dict(\n                     type='SeparateHead', init_bias=-2.19, final_kernel=3),\n                 share_conv_channel=64,\n                 num_heatmap_convs=2,\n                 conv_cfg=dict(type='Conv2d'),\n                 norm_cfg=dict(type='BN2d'),\n                 bias='auto',\n                 norm_bbox=True,\n                 init_cfg=None,\n                 task_specific=True):\n        assert init_cfg is None, 'To prevent abnormal initialization ' \\\n            'behavior, init_cfg is not allowed to be set'\n        super(CenterHead, self).__init__(init_cfg=init_cfg)\n\n        num_classes = [len(t['class_names']) for t in tasks]\n        self.class_names = [t['class_names'] for t in tasks]\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.in_channels = in_channels\n        self.num_classes = num_classes\n        self.norm_bbox = norm_bbox\n\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_bbox = build_loss(loss_bbox)\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n        self.num_anchor_per_locs = [n for n in num_classes]\n        self.fp16_enabled = False\n\n        # a shared convolution\n        self.shared_conv = ConvModule(\n            in_channels,\n            share_conv_channel,\n            kernel_size=3,\n            padding=1,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            bias=bias)\n\n        self.task_heads = nn.ModuleList()\n\n        for num_cls in num_classes:\n            heads = copy.deepcopy(common_heads)\n            heads.update(dict(heatmap=(num_cls, num_heatmap_convs)))\n            separate_head.update(\n                in_channels=share_conv_channel, heads=heads, num_cls=num_cls)\n            self.task_heads.append(builder.build_head(separate_head))\n\n        self.with_velocity = 'vel' in common_heads.keys()\n        self.task_specific = task_specific\n\n    def forward_single(self, x):\n        \"\"\"Forward function for CenterPoint.\n\n        Args:\n            x (torch.Tensor): Input feature map with the shape of\n                [B, 512, 128, 128].\n\n        Returns:\n            list[dict]: Output results for tasks.\n        \"\"\"\n        ret_dicts = []\n\n        x = self.shared_conv(x)\n\n        for task in self.task_heads:\n            ret_dicts.append(task(x))\n\n        return ret_dicts\n\n    def forward(self, feats):\n        \"\"\"Forward pass.\n\n        Args:\n            feats (list[torch.Tensor]): Multi-level features, e.g.,\n                features produced by FPN.\n\n        Returns:\n            tuple(list[dict]): Output results for tasks.\n        \"\"\"\n        return multi_apply(self.forward_single, feats)\n\n    def _gather_feat(self, feat, ind, mask=None):\n        \"\"\"Gather feature map.\n\n        Given feature map and index, return indexed feature map.\n\n        Args:\n            feat (torch.tensor): Feature map with the shape of [B, H*W, 10].\n            ind (torch.Tensor): Index of the ground truth boxes with the\n                shape of [B, max_obj].\n            mask (torch.Tensor, optional): Mask of the feature map with the\n                shape of [B, max_obj]. Default: None.\n\n        Returns:\n            torch.Tensor: Feature map after gathering with the shape\n                of [B, max_obj, 10].\n        \"\"\"\n        dim = feat.size(2)\n        ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)\n        feat = feat.gather(1, ind)\n        if mask is not None:\n            mask = mask.unsqueeze(2).expand_as(feat)\n            feat = feat[mask]\n            feat = feat.view(-1, dim)\n        return feat\n\n    def get_targets(self, gt_bboxes_3d, gt_labels_3d):\n        \"\"\"Generate targets.\n\n        How each output is transformed:\n\n            Each nested list is transposed so that all same-index elements in\n            each sub-list (1, ..., N) become the new sub-lists.\n                [ [a0, a1, a2, ... ], [b0, b1, b2, ... ], ... ]\n                ==> [ [a0, b0, ... ], [a1, b1, ... ], [a2, b2, ... ] ]\n\n            The new transposed nested list is converted into a list of N\n            tensors generated by concatenating tensors in the new sub-lists.\n                [ tensor0, tensor1, tensor2, ... ]\n\n        Args:\n            gt_bboxes_3d (list[:obj:`LiDARInstance3DBoxes`]): Ground\n                truth gt boxes.\n            gt_labels_3d (list[torch.Tensor]): Labels of boxes.\n\n        Returns:\n            Returns:\n                tuple[list[torch.Tensor]]: Tuple of target including\n                    the following results in order.\n\n                    - list[torch.Tensor]: Heatmap scores.\n                    - list[torch.Tensor]: Ground truth boxes.\n                    - list[torch.Tensor]: Indexes indicating the\n                        position of the valid boxes.\n                    - list[torch.Tensor]: Masks indicating which\n                        boxes are valid.\n        \"\"\"\n        heatmaps, anno_boxes, inds, masks = multi_apply(\n            self.get_targets_single, gt_bboxes_3d, gt_labels_3d)\n        # Transpose heatmaps\n        heatmaps = list(map(list, zip(*heatmaps)))\n        heatmaps = [torch.stack(hms_) for hms_ in heatmaps]\n        # Transpose anno_boxes\n        anno_boxes = list(map(list, zip(*anno_boxes)))\n        anno_boxes = [torch.stack(anno_boxes_) for anno_boxes_ in anno_boxes]\n        # Transpose inds\n        inds = list(map(list, zip(*inds)))\n        inds = [torch.stack(inds_) for inds_ in inds]\n        # Transpose inds\n        masks = list(map(list, zip(*masks)))\n        masks = [torch.stack(masks_) for masks_ in masks]\n        return heatmaps, anno_boxes, inds, masks\n\n    def get_targets_single(self, gt_bboxes_3d, gt_labels_3d):\n        \"\"\"Generate training targets for a single sample.\n\n        Args:\n            gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): Ground truth gt boxes.\n            gt_labels_3d (torch.Tensor): Labels of boxes.\n\n        Returns:\n            tuple[list[torch.Tensor]]: Tuple of target including\n                the following results in order.\n\n                - list[torch.Tensor]: Heatmap scores.\n                - list[torch.Tensor]: Ground truth boxes.\n                - list[torch.Tensor]: Indexes indicating the position\n                    of the valid boxes.\n                - list[torch.Tensor]: Masks indicating which boxes\n                    are valid.\n        \"\"\"\n        device = gt_labels_3d.device\n        gt_bboxes_3d = torch.cat(\n            (gt_bboxes_3d.gravity_center, gt_bboxes_3d.tensor[:, 3:]),\n            dim=1).to(device)\n        max_objs = self.train_cfg['max_objs'] * self.train_cfg['dense_reg']\n        grid_size = torch.tensor(self.train_cfg['grid_size'])\n        pc_range = torch.tensor(self.train_cfg['point_cloud_range'])\n        voxel_size = torch.tensor(self.train_cfg['voxel_size'])\n\n        feature_map_size = grid_size[:2] // self.train_cfg['out_size_factor']\n\n        # reorganize the gt_dict by tasks\n        task_masks = []\n        flag = 0\n        for class_name in self.class_names:\n            task_masks.append([\n                torch.where(gt_labels_3d == class_name.index(i) + flag)\n                for i in class_name\n            ])\n            flag += len(class_name)\n\n        task_boxes = []\n        task_classes = []\n        flag2 = 0\n        for idx, mask in enumerate(task_masks):\n            task_box = []\n            task_class = []\n            for m in mask:\n                task_box.append(gt_bboxes_3d[m])\n                # 0 is background for each task, so we need to add 1 here.\n                task_class.append(gt_labels_3d[m] + 1 - flag2)\n            task_boxes.append(torch.cat(task_box, axis=0).to(device))\n            task_classes.append(torch.cat(task_class).long().to(device))\n            flag2 += len(mask)\n        draw_gaussian = draw_heatmap_gaussian\n        heatmaps, anno_boxes, inds, masks = [], [], [], []\n\n        for idx, task_head in enumerate(self.task_heads):\n            heatmap = gt_bboxes_3d.new_zeros(\n                (len(self.class_names[idx]), feature_map_size[1],\n                 feature_map_size[0]))\n\n            if self.with_velocity:\n                anno_box = gt_bboxes_3d.new_zeros((max_objs, 10),\n                                                  dtype=torch.float32)\n            else:\n                anno_box = gt_bboxes_3d.new_zeros((max_objs, 8),\n                                                  dtype=torch.float32)\n\n            ind = gt_labels_3d.new_zeros((max_objs), dtype=torch.int64)\n            mask = gt_bboxes_3d.new_zeros((max_objs), dtype=torch.uint8)\n\n            num_objs = min(task_boxes[idx].shape[0], max_objs)\n\n            for k in range(num_objs):\n                cls_id = task_classes[idx][k] - 1\n\n                width = task_boxes[idx][k][3]\n                length = task_boxes[idx][k][4]\n                width = width / voxel_size[0] / self.train_cfg[\n                    'out_size_factor']\n                length = length / voxel_size[1] / self.train_cfg[\n                    'out_size_factor']\n\n                if width > 0 and length > 0:\n                    radius = gaussian_radius(\n                        (length, width),\n                        min_overlap=self.train_cfg['gaussian_overlap'])\n                    radius = max(self.train_cfg['min_radius'], int(radius))\n\n                    # be really careful for the coordinate system of\n                    # your box annotation.\n                    x, y, z = task_boxes[idx][k][0], task_boxes[idx][k][\n                        1], task_boxes[idx][k][2]\n\n                    coor_x = (\n                        x - pc_range[0]\n                    ) / voxel_size[0] / self.train_cfg['out_size_factor']\n                    coor_y = (\n                        y - pc_range[1]\n                    ) / voxel_size[1] / self.train_cfg['out_size_factor']\n\n                    center = torch.tensor([coor_x, coor_y],\n                                          dtype=torch.float32,\n                                          device=device)\n                    center_int = center.to(torch.int32)\n\n                    # throw out not in range objects to avoid out of array\n                    # area when creating the heatmap\n                    if not (0 <= center_int[0] < feature_map_size[0]\n                            and 0 <= center_int[1] < feature_map_size[1]):\n                        continue\n\n                    draw_gaussian(heatmap[cls_id], center_int, radius)\n\n                    new_idx = k\n                    x, y = center_int[0], center_int[1]\n\n                    assert (y * feature_map_size[0] + x <\n                            feature_map_size[0] * feature_map_size[1])\n\n                    ind[new_idx] = y * feature_map_size[0] + x\n                    mask[new_idx] = 1\n                    # TODO: support other outdoor dataset\n                    rot = task_boxes[idx][k][6]\n                    box_dim = task_boxes[idx][k][3:6]\n                    if self.norm_bbox:\n                        box_dim = box_dim.log()\n                    if self.with_velocity:\n                        vx, vy = task_boxes[idx][k][7:]\n                        anno_box[new_idx] = torch.cat([\n                            center - torch.tensor([x, y], device=device),\n                            z.unsqueeze(0), box_dim,\n                            torch.sin(rot).unsqueeze(0),\n                            torch.cos(rot).unsqueeze(0),\n                            vx.unsqueeze(0),\n                            vy.unsqueeze(0)\n                        ])\n                    else:\n                        anno_box[new_idx] = torch.cat([\n                            center - torch.tensor([x, y], device=device),\n                            z.unsqueeze(0), box_dim,\n                            torch.sin(rot).unsqueeze(0),\n                            torch.cos(rot).unsqueeze(0)\n                        ])\n\n            heatmaps.append(heatmap)\n            anno_boxes.append(anno_box)\n            masks.append(mask)\n            inds.append(ind)\n        return heatmaps, anno_boxes, inds, masks\n\n    def loss(self, gt_bboxes_3d, gt_labels_3d, preds_dicts, **kwargs):\n        \"\"\"Loss function for CenterHead.\n\n        Args:\n            gt_bboxes_3d (list[:obj:`LiDARInstance3DBoxes`]): Ground\n                truth gt boxes.\n            gt_labels_3d (list[torch.Tensor]): Labels of boxes.\n            preds_dicts (dict): Output of forward function.\n\n        Returns:\n            dict[str:torch.Tensor]: Loss of heatmap and bbox of each task.\n        \"\"\"\n        heatmaps, anno_boxes, inds, masks = self.get_targets(\n            gt_bboxes_3d, gt_labels_3d)\n        loss_dict = dict()\n        if not self.task_specific:\n            loss_dict['loss'] = 0\n        for task_id, preds_dict in enumerate(preds_dicts):\n            # heatmap focal loss\n            preds_dict[0]['heatmap'] = clip_sigmoid(preds_dict[0]['heatmap'])\n            num_pos = heatmaps[task_id].eq(1).float().sum().item()\n            cls_avg_factor = torch.clamp(\n                reduce_mean(heatmaps[task_id].new_tensor(num_pos)),\n                min=1).item()\n            loss_heatmap = self.loss_cls(\n                preds_dict[0]['heatmap'],\n                heatmaps[task_id],\n                avg_factor=cls_avg_factor)\n            target_box = anno_boxes[task_id]\n            # reconstruct the anno_box from multiple reg heads\n            preds_dict[0]['anno_box'] = torch.cat(\n                (\n                    preds_dict[0]['reg'],\n                    preds_dict[0]['height'],\n                    preds_dict[0]['dim'],\n                    preds_dict[0]['rot'],\n                    preds_dict[0]['vel'],\n                ),\n                dim=1,\n            )\n\n            # Regression loss for dimension, offset, height, rotation\n            num = masks[task_id].float().sum()\n            ind = inds[task_id]\n            pred = preds_dict[0]['anno_box'].permute(0, 2, 3, 1).contiguous()\n            pred = pred.view(pred.size(0), -1, pred.size(3))\n            pred = self._gather_feat(pred, ind)\n            mask = masks[task_id].unsqueeze(2).expand_as(target_box).float()\n            num = torch.clamp(\n                reduce_mean(target_box.new_tensor(num)), min=1e-4).item()\n            isnotnan = (~torch.isnan(target_box)).float()\n            mask *= isnotnan\n            code_weights = self.train_cfg['code_weights']\n            bbox_weights = mask * mask.new_tensor(code_weights)\n            if self.task_specific:\n                name_list = ['xy', 'z', 'whl', 'yaw', 'vel']\n                clip_index = [0, 2, 3, 6, 8, 10]\n                for reg_task_id in range(len(name_list)):\n                    pred_tmp = pred[\n                        ...,\n                        clip_index[reg_task_id]:clip_index[reg_task_id + 1]]\n                    target_box_tmp = target_box[\n                        ...,\n                        clip_index[reg_task_id]:clip_index[reg_task_id + 1]]\n                    bbox_weights_tmp = bbox_weights[\n                        ...,\n                        clip_index[reg_task_id]:clip_index[reg_task_id + 1]]\n                    loss_bbox_tmp = self.loss_bbox(\n                        pred_tmp,\n                        target_box_tmp,\n                        bbox_weights_tmp,\n                        avg_factor=(num + 1e-4))\n                    loss_dict[f'task{task_id}.loss_%s' %\n                              (name_list[reg_task_id])] = loss_bbox_tmp\n                loss_dict[f'task{task_id}.loss_heatmap'] = loss_heatmap\n            else:\n                loss_bbox = self.loss_bbox(\n                    pred, target_box, bbox_weights, avg_factor=num)\n                loss_dict['loss'] += loss_bbox\n                loss_dict['loss'] += loss_heatmap\n\n        return loss_dict\n\n    def get_bboxes(self, preds_dicts, img_metas, img=None, rescale=False):\n        \"\"\"Generate bboxes from bbox head predictions.\n\n        Args:\n            preds_dicts (tuple[list[dict]]): Prediction results.\n            img_metas (list[dict]): Point cloud and image's meta info.\n\n        Returns:\n            list[dict]: Decoded bbox, scores and labels after nms.\n        \"\"\"\n        rets = []\n        for task_id, preds_dict in enumerate(preds_dicts):\n            batch_size = preds_dict[0]['heatmap'].shape[0]\n            batch_heatmap = preds_dict[0]['heatmap'].sigmoid()\n\n            batch_reg = preds_dict[0]['reg']\n            batch_hei = preds_dict[0]['height']\n\n            if self.norm_bbox:\n                batch_dim = torch.exp(preds_dict[0]['dim'])\n            else:\n                batch_dim = preds_dict[0]['dim']\n\n            batch_rots = preds_dict[0]['rot'][:, 0].unsqueeze(1)\n            batch_rotc = preds_dict[0]['rot'][:, 1].unsqueeze(1)\n\n            if 'vel' in preds_dict[0]:\n                batch_vel = preds_dict[0]['vel']\n            else:\n                batch_vel = None\n            temp = self.bbox_coder.decode(\n                batch_heatmap,\n                batch_rots,\n                batch_rotc,\n                batch_hei,\n                batch_dim,\n                batch_vel,\n                reg=batch_reg,\n                task_id=task_id)\n            batch_reg_preds = [box['bboxes'] for box in temp]\n            batch_cls_preds = [box['scores'] for box in temp]\n            batch_cls_labels = [box['labels'] for box in temp]\n            nms_type = self.test_cfg.get('nms_type')\n            if isinstance(nms_type, list):\n                nms_type = nms_type[task_id]\n            if nms_type == 'circle':\n                ret_task = []\n                for i in range(batch_size):\n                    boxes3d = temp[i]['bboxes']\n                    scores = temp[i]['scores']\n                    labels = temp[i]['labels']\n                    centers = boxes3d[:, [0, 1]]\n                    boxes = torch.cat([centers, scores.view(-1, 1)], dim=1)\n                    keep = torch.tensor(\n                        circle_nms(\n                            boxes.detach().cpu().numpy(),\n                            self.test_cfg['min_radius'][task_id],\n                            post_max_size=self.test_cfg['post_max_size']),\n                        dtype=torch.long,\n                        device=boxes.device)\n\n                    boxes3d = boxes3d[keep]\n                    scores = scores[keep]\n                    labels = labels[keep]\n                    ret = dict(bboxes=boxes3d, scores=scores, labels=labels)\n                    ret_task.append(ret)\n                rets.append(ret_task)\n            else:\n                rets.append(\n                    self.get_task_detections(batch_cls_preds, batch_reg_preds,\n                                             batch_cls_labels, img_metas,\n                                             task_id))\n\n        # Merge branches results\n        num_samples = len(rets[0])\n\n        ret_list = []\n        for i in range(num_samples):\n            for k in rets[0][i].keys():\n                if k == 'bboxes':\n                    bboxes = torch.cat([ret[i][k] for ret in rets])\n                    bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 5] * 0.5\n                    bboxes = img_metas[i]['box_type_3d'](\n                        bboxes, self.bbox_coder.code_size)\n                elif k == 'scores':\n                    scores = torch.cat([ret[i][k] for ret in rets])\n                elif k == 'labels':\n                    flag = 0\n                    for j, num_class in enumerate(self.num_classes):\n                        rets[j][i][k] += flag\n                        flag += num_class\n                    labels = torch.cat([ret[i][k].int() for ret in rets])\n            ret_list.append([bboxes, scores, labels])\n        return ret_list\n\n    def get_task_detections(self, batch_cls_preds,\n                            batch_reg_preds, batch_cls_labels, img_metas,\n                            task_id):\n        \"\"\"Rotate nms for each task.\n\n        Args:\n            batch_cls_preds (list[torch.Tensor]): Prediction score with the\n                shape of [N].\n            batch_reg_preds (list[torch.Tensor]): Prediction bbox with the\n                shape of [N, 9].\n            batch_cls_labels (list[torch.Tensor]): Prediction label with the\n                shape of [N].\n            img_metas (list[dict]): Meta information of each sample.\n\n        Returns:\n            list[dict[str: torch.Tensor]]: contains the following keys:\n\n                -bboxes (torch.Tensor): Prediction bboxes after nms with the\n                    shape of [N, 9].\n                -scores (torch.Tensor): Prediction scores after nms with the\n                    shape of [N].\n                -labels (torch.Tensor): Prediction labels after nms with the\n                    shape of [N].\n        \"\"\"\n        predictions_dicts = []\n        for i, (box_preds, cls_preds, cls_labels) in enumerate(\n                zip(batch_reg_preds, batch_cls_preds, batch_cls_labels)):\n            default_val = [1.0 for _ in range(len(self.task_heads))]\n            factor = self.test_cfg.get('nms_rescale_factor',\n                                       default_val)[task_id]\n            if isinstance(factor, list):\n                for cid in range(len(factor)):\n                    box_preds[cls_labels == cid, 3:6] = \\\n                        box_preds[cls_labels == cid, 3:6] * factor[cid]\n            else:\n                box_preds[:, 3:6] = box_preds[:, 3:6] * factor\n\n            # Apply NMS in birdeye view\n            top_labels = cls_labels.long()\n            top_scores = cls_preds.squeeze(-1) if cls_preds.shape[0]>1 \\\n                else cls_preds\n\n            if top_scores.shape[0] != 0:\n                boxes_for_nms = img_metas[i]['box_type_3d'](\n                    box_preds[:, :], self.bbox_coder.code_size).bev\n                # the nms in 3d detection just remove overlap boxes.\n                if isinstance(self.test_cfg['nms_thr'], list):\n                    nms_thresh = self.test_cfg['nms_thr'][task_id]\n                else:\n                    nms_thresh = self.test_cfg['nms_thr']\n                selected = nms_bev(\n                    boxes_for_nms,\n                    top_scores,\n                    thresh=nms_thresh,\n                    pre_max_size=self.test_cfg['pre_max_size'],\n                    post_max_size=self.test_cfg['post_max_size'],\n                    xyxyr2xywhr=False)\n            else:\n                selected = []\n\n            if isinstance(factor, list):\n                for cid in range(len(factor)):\n                    box_preds[top_labels == cid, 3:6] = \\\n                        box_preds[top_labels == cid, 3:6] / factor[cid]\n            else:\n                box_preds[:, 3:6] = box_preds[:, 3:6] / factor\n\n            # if selected is not None:\n            selected_boxes = box_preds[selected]\n            selected_labels = top_labels[selected]\n            selected_scores = top_scores[selected]\n\n            # finally generate predictions.\n            if selected_boxes.shape[0] != 0:\n                predictions_dict = dict(\n                    bboxes=selected_boxes,\n                    scores=selected_scores,\n                    labels=selected_labels)\n            else:\n                dtype = batch_reg_preds[0].dtype\n                device = batch_reg_preds[0].device\n                predictions_dict = dict(\n                    bboxes=torch.zeros([0, self.bbox_coder.code_size],\n                                       dtype=dtype,\n                                       device=device),\n                    scores=torch.zeros([0], dtype=dtype, device=device),\n                    labels=torch.zeros([0],\n                                       dtype=top_labels.dtype,\n                                       device=device))\n\n            predictions_dicts.append(predictions_dict)\n        return predictions_dicts\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/fcaf3d_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n# Adapted from https://github.com/SamsungLabs/fcaf3d/blob/master/mmdet3d/models/dense_heads/fcaf3d_neck_with_head.py # noqa\ntry:\n    import MinkowskiEngine as ME\nexcept ImportError:\n    # Please follow getting_started.md to install MinkowskiEngine.\n    pass\n\nimport torch\nfrom mmcv.cnn import Scale, bias_init_with_prob\nfrom mmcv.ops import nms3d, nms3d_normal\nfrom mmcv.runner.base_module import BaseModule\nfrom torch import nn\n\nfrom mmdet3d.core.bbox.structures import rotation_3d_in_axis\nfrom mmdet3d.models import HEADS, build_loss\nfrom mmdet.core import reduce_mean\n\n\n@HEADS.register_module()\nclass FCAF3DHead(BaseModule):\n    r\"\"\"Bbox head of `FCAF3D <https://arxiv.org/abs/2112.00322>`_.\n    Actually here we store both the sparse 3D FPN and a head. The neck and\n    the head can not be simply separated as pruning score on the i-th level\n    of FPN requires classification scores from i+1-th level of the head.\n\n    Args:\n        n_classes (int): Number of classes.\n        in_channels (tuple[int]): Number of channels in input tensors.\n        out_channels (int): Number of channels in the neck output tensors.\n        n_reg_outs (int): Number of regression layer channels.\n        voxel_size (float): Voxel size in meters.\n        pts_prune_threshold (int): Pruning threshold on each feature level.\n        pts_assign_threshold (int): Box to location assigner parameter.\n            Assigner selects the maximum feature level with more locations\n            inside the box than pts_assign_threshold.\n        pts_center_threshold (int): Box to location assigner parameter.\n            After feature level for the box is determined, assigner selects\n            pts_center_threshold locations closest to the box center.\n        center_loss (dict, optional): Config of centerness loss.\n        bbox_loss (dict, optional): Config of bbox loss.\n        cls_loss (dict, optional): Config of classification loss.\n        train_cfg (dict, optional): Config for train stage. Defaults to None.\n        test_cfg (dict, optional): Config for test stage. Defaults to None.\n        init_cfg (dict, optional): Config for weight initialization.\n            Defaults to None.\n    \"\"\"\n\n    def __init__(self,\n                 n_classes,\n                 in_channels,\n                 out_channels,\n                 n_reg_outs,\n                 voxel_size,\n                 pts_prune_threshold,\n                 pts_assign_threshold,\n                 pts_center_threshold,\n                 center_loss=dict(type='CrossEntropyLoss', use_sigmoid=True),\n                 bbox_loss=dict(type='AxisAlignedIoULoss'),\n                 cls_loss=dict(type='FocalLoss'),\n                 train_cfg=None,\n                 test_cfg=None,\n                 init_cfg=None):\n        super(FCAF3DHead, self).__init__(init_cfg)\n        self.voxel_size = voxel_size\n        self.pts_prune_threshold = pts_prune_threshold\n        self.pts_assign_threshold = pts_assign_threshold\n        self.pts_center_threshold = pts_center_threshold\n        self.center_loss = build_loss(center_loss)\n        self.bbox_loss = build_loss(bbox_loss)\n        self.cls_loss = build_loss(cls_loss)\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self._init_layers(in_channels, out_channels, n_reg_outs, n_classes)\n\n    @staticmethod\n    def _make_block(in_channels, out_channels):\n        \"\"\"Construct Conv-Norm-Act block.\n\n        Args:\n            in_channels (int): Number of input channels.\n            out_channels (int): Number of output channels.\n\n        Returns:\n            torch.nn.Module: With corresponding layers.\n        \"\"\"\n        return nn.Sequential(\n            ME.MinkowskiConvolution(\n                in_channels, out_channels, kernel_size=3, dimension=3),\n            ME.MinkowskiBatchNorm(out_channels), ME.MinkowskiELU())\n\n    @staticmethod\n    def _make_up_block(in_channels, out_channels):\n        \"\"\"Construct DeConv-Norm-Act-Conv-Norm-Act block.\n\n        Args:\n            in_channels (int): Number of input channels.\n            out_channels (int): Number of output channels.\n\n        Returns:\n            torch.nn.Module: With corresponding layers.\n        \"\"\"\n        return nn.Sequential(\n            ME.MinkowskiGenerativeConvolutionTranspose(\n                in_channels,\n                out_channels,\n                kernel_size=2,\n                stride=2,\n                dimension=3), ME.MinkowskiBatchNorm(out_channels),\n            ME.MinkowskiELU(),\n            ME.MinkowskiConvolution(\n                out_channels, out_channels, kernel_size=3, dimension=3),\n            ME.MinkowskiBatchNorm(out_channels), ME.MinkowskiELU())\n\n    def _init_layers(self, in_channels, out_channels, n_reg_outs, n_classes):\n        \"\"\"Initialize layers.\n\n        Args:\n            in_channels (tuple[int]): Number of channels in input tensors.\n            out_channels (int): Number of channels in the neck output tensors.\n            n_reg_outs (int): Number of regression layer channels.\n            n_classes (int): Number of classes.\n        \"\"\"\n        # neck layers\n        self.pruning = ME.MinkowskiPruning()\n        for i in range(len(in_channels)):\n            if i > 0:\n                self.__setattr__(\n                    f'up_block_{i}',\n                    self._make_up_block(in_channels[i], in_channels[i - 1]))\n            self.__setattr__(f'out_block_{i}',\n                             self._make_block(in_channels[i], out_channels))\n\n        # head layers\n        self.conv_center = ME.MinkowskiConvolution(\n            out_channels, 1, kernel_size=1, dimension=3)\n        self.conv_reg = ME.MinkowskiConvolution(\n            out_channels, n_reg_outs, kernel_size=1, dimension=3)\n        self.conv_cls = ME.MinkowskiConvolution(\n            out_channels, n_classes, kernel_size=1, bias=True, dimension=3)\n        self.scales = nn.ModuleList(\n            [Scale(1.) for _ in range(len(in_channels))])\n\n    def init_weights(self):\n        \"\"\"Initialize weights.\"\"\"\n        nn.init.normal_(self.conv_center.kernel, std=.01)\n        nn.init.normal_(self.conv_reg.kernel, std=.01)\n        nn.init.normal_(self.conv_cls.kernel, std=.01)\n        nn.init.constant_(self.conv_cls.bias, bias_init_with_prob(.01))\n\n    def forward(self, x):\n        \"\"\"Forward pass.\n\n        Args:\n            x (list[Tensor]): Features from the backbone.\n\n        Returns:\n            list[list[Tensor]]: Predictions of the head.\n        \"\"\"\n        center_preds, bbox_preds, cls_preds, points = [], [], [], []\n        inputs = x\n        x = inputs[-1]\n        prune_score = None\n        for i in range(len(inputs) - 1, -1, -1):\n            if i < len(inputs) - 1:\n                x = self.__getattr__(f'up_block_{i + 1}')(x)\n                x = inputs[i] + x\n                x = self._prune(x, prune_score)\n\n            out = self.__getattr__(f'out_block_{i}')(x)\n            center_pred, bbox_pred, cls_pred, point, prune_score = \\\n                self._forward_single(out, self.scales[i])\n            center_preds.append(center_pred)\n            bbox_preds.append(bbox_pred)\n            cls_preds.append(cls_pred)\n            points.append(point)\n        return center_preds[::-1], bbox_preds[::-1], cls_preds[::-1], \\\n            points[::-1]\n\n    def forward_train(self, x, gt_bboxes, gt_labels, input_metas):\n        \"\"\"Forward pass of the train stage.\n\n        Args:\n            x (list[SparseTensor]): Features from the backbone.\n            gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                bboxes of each sample.\n            gt_labels(list[torch.Tensor]): Labels of each sample.\n            input_metas (list[dict]): Contains scene meta info for each sample.\n\n        Returns:\n            dict: Centerness, bbox and classification loss values.\n        \"\"\"\n        center_preds, bbox_preds, cls_preds, points = self(x)\n        return self._loss(center_preds, bbox_preds, cls_preds, points,\n                          gt_bboxes, gt_labels, input_metas)\n\n    def forward_test(self, x, input_metas):\n        \"\"\"Forward pass of the test stage.\n\n        Args:\n            x (list[SparseTensor]): Features from the backbone.\n            input_metas (list[dict]): Contains scene meta info for each sample.\n\n        Returns:\n            list[list[Tensor]]: bboxes, scores and labels for each sample.\n        \"\"\"\n        center_preds, bbox_preds, cls_preds, points = self(x)\n        return self._get_bboxes(center_preds, bbox_preds, cls_preds, points,\n                                input_metas)\n\n    def _prune(self, x, scores):\n        \"\"\"Prunes the tensor by score thresholding.\n\n        Args:\n            x (SparseTensor): Tensor to be pruned.\n            scores (SparseTensor): Scores for thresholding.\n\n        Returns:\n            SparseTensor: Pruned tensor.\n        \"\"\"\n        with torch.no_grad():\n            coordinates = x.C.float()\n            interpolated_scores = scores.features_at_coordinates(coordinates)\n            prune_mask = interpolated_scores.new_zeros(\n                (len(interpolated_scores)), dtype=torch.bool)\n            for permutation in x.decomposition_permutations:\n                score = interpolated_scores[permutation]\n                mask = score.new_zeros((len(score)), dtype=torch.bool)\n                topk = min(len(score), self.pts_prune_threshold)\n                ids = torch.topk(score.squeeze(1), topk, sorted=False).indices\n                mask[ids] = True\n                prune_mask[permutation[mask]] = True\n        x = self.pruning(x, prune_mask)\n        return x\n\n    def _forward_single(self, x, scale):\n        \"\"\"Forward pass per level.\n\n        Args:\n            x (SparseTensor): Per level neck output tensor.\n            scale (mmcv.cnn.Scale): Per level multiplication weight.\n\n        Returns:\n            tuple[Tensor]: Per level head predictions.\n        \"\"\"\n        center_pred = self.conv_center(x).features\n        scores = self.conv_cls(x)\n        cls_pred = scores.features\n        prune_scores = ME.SparseTensor(\n            scores.features.max(dim=1, keepdim=True).values,\n            coordinate_map_key=scores.coordinate_map_key,\n            coordinate_manager=scores.coordinate_manager)\n        reg_final = self.conv_reg(x).features\n        reg_distance = torch.exp(scale(reg_final[:, :6]))\n        reg_angle = reg_final[:, 6:]\n        bbox_pred = torch.cat((reg_distance, reg_angle), dim=1)\n\n        center_preds, bbox_preds, cls_preds, points = [], [], [], []\n        for permutation in x.decomposition_permutations:\n            center_preds.append(center_pred[permutation])\n            bbox_preds.append(bbox_pred[permutation])\n            cls_preds.append(cls_pred[permutation])\n\n        points = x.decomposed_coordinates\n        for i in range(len(points)):\n            points[i] = points[i] * self.voxel_size\n\n        return center_preds, bbox_preds, cls_preds, points, prune_scores\n\n    def _loss_single(self, center_preds, bbox_preds, cls_preds, points,\n                     gt_bboxes, gt_labels, input_meta):\n        \"\"\"Per scene loss function.\n\n        Args:\n            center_preds (list[Tensor]): Centerness predictions for all levels.\n            bbox_preds (list[Tensor]): Bbox predictions for all levels.\n            cls_preds (list[Tensor]): Classification predictions for all\n                levels.\n            points (list[Tensor]): Final location coordinates for all levels.\n            gt_bboxes (BaseInstance3DBoxes): Ground truth boxes.\n            gt_labels (Tensor): Ground truth labels.\n            input_meta (dict): Scene meta info.\n\n        Returns:\n            tuple[Tensor]: Centerness, bbox, and classification loss values.\n        \"\"\"\n        center_targets, bbox_targets, cls_targets = self._get_targets(\n            points, gt_bboxes, gt_labels)\n\n        center_preds = torch.cat(center_preds)\n        bbox_preds = torch.cat(bbox_preds)\n        cls_preds = torch.cat(cls_preds)\n        points = torch.cat(points)\n\n        # cls loss\n        pos_inds = torch.nonzero(cls_targets >= 0).squeeze(1)\n        n_pos = points.new_tensor(len(pos_inds))\n        n_pos = max(reduce_mean(n_pos), 1.)\n        cls_loss = self.cls_loss(cls_preds, cls_targets, avg_factor=n_pos)\n\n        # bbox and centerness losses\n        pos_center_preds = center_preds[pos_inds]\n        pos_bbox_preds = bbox_preds[pos_inds]\n        pos_center_targets = center_targets[pos_inds].unsqueeze(1)\n        pos_bbox_targets = bbox_targets[pos_inds]\n        # reduce_mean is outside if / else block to prevent deadlock\n        center_denorm = max(\n            reduce_mean(pos_center_targets.sum().detach()), 1e-6)\n        if len(pos_inds) > 0:\n            pos_points = points[pos_inds]\n            center_loss = self.center_loss(\n                pos_center_preds, pos_center_targets, avg_factor=n_pos)\n            bbox_loss = self.bbox_loss(\n                self._bbox_to_loss(\n                    self._bbox_pred_to_bbox(pos_points, pos_bbox_preds)),\n                self._bbox_to_loss(pos_bbox_targets),\n                weight=pos_center_targets.squeeze(1),\n                avg_factor=center_denorm)\n        else:\n            center_loss = pos_center_preds.sum()\n            bbox_loss = pos_bbox_preds.sum()\n        return center_loss, bbox_loss, cls_loss\n\n    def _loss(self, center_preds, bbox_preds, cls_preds, points, gt_bboxes,\n              gt_labels, input_metas):\n        \"\"\"Per scene loss function.\n\n        Args:\n            center_preds (list[list[Tensor]]): Centerness predictions for\n                all scenes.\n            bbox_preds (list[list[Tensor]]): Bbox predictions for all scenes.\n            cls_preds (list[list[Tensor]]): Classification predictions for all\n                scenes.\n            points (list[list[Tensor]]): Final location coordinates for all\n                scenes.\n            gt_bboxes (list[BaseInstance3DBoxes]): Ground truth boxes for all\n                scenes.\n            gt_labels (list[Tensor]): Ground truth labels for all scenes.\n            input_metas (list[dict]): Meta infos for all scenes.\n\n        Returns:\n            dict: Centerness, bbox, and classification loss values.\n        \"\"\"\n        center_losses, bbox_losses, cls_losses = [], [], []\n        for i in range(len(input_metas)):\n            center_loss, bbox_loss, cls_loss = self._loss_single(\n                center_preds=[x[i] for x in center_preds],\n                bbox_preds=[x[i] for x in bbox_preds],\n                cls_preds=[x[i] for x in cls_preds],\n                points=[x[i] for x in points],\n                input_meta=input_metas[i],\n                gt_bboxes=gt_bboxes[i],\n                gt_labels=gt_labels[i])\n            center_losses.append(center_loss)\n            bbox_losses.append(bbox_loss)\n            cls_losses.append(cls_loss)\n        return dict(\n            center_loss=torch.mean(torch.stack(center_losses)),\n            bbox_loss=torch.mean(torch.stack(bbox_losses)),\n            cls_loss=torch.mean(torch.stack(cls_losses)))\n\n    def _get_bboxes_single(self, center_preds, bbox_preds, cls_preds, points,\n                           input_meta):\n        \"\"\"Generate boxes for a single scene.\n\n        Args:\n            center_preds (list[Tensor]): Centerness predictions for all levels.\n            bbox_preds (list[Tensor]): Bbox predictions for all levels.\n            cls_preds (list[Tensor]): Classification predictions for all\n                levels.\n            points (list[Tensor]): Final location coordinates for all levels.\n            input_meta (dict): Scene meta info.\n\n        Returns:\n            tuple[Tensor]: Predicted bounding boxes, scores and labels.\n        \"\"\"\n        mlvl_bboxes, mlvl_scores = [], []\n        for center_pred, bbox_pred, cls_pred, point in zip(\n                center_preds, bbox_preds, cls_preds, points):\n            scores = cls_pred.sigmoid() * center_pred.sigmoid()\n            max_scores, _ = scores.max(dim=1)\n\n            if len(scores) > self.test_cfg.nms_pre > 0:\n                _, ids = max_scores.topk(self.test_cfg.nms_pre)\n                bbox_pred = bbox_pred[ids]\n                scores = scores[ids]\n                point = point[ids]\n\n            bboxes = self._bbox_pred_to_bbox(point, bbox_pred)\n            mlvl_bboxes.append(bboxes)\n            mlvl_scores.append(scores)\n\n        bboxes = torch.cat(mlvl_bboxes)\n        scores = torch.cat(mlvl_scores)\n        bboxes, scores, labels = self._single_scene_multiclass_nms(\n            bboxes, scores, input_meta)\n        return bboxes, scores, labels\n\n    def _get_bboxes(self, center_preds, bbox_preds, cls_preds, points,\n                    input_metas):\n        \"\"\"Generate boxes for all scenes.\n\n        Args:\n            center_preds (list[list[Tensor]]): Centerness predictions for\n                all scenes.\n            bbox_preds (list[list[Tensor]]): Bbox predictions for all scenes.\n            cls_preds (list[list[Tensor]]): Classification predictions for all\n                scenes.\n            points (list[list[Tensor]]): Final location coordinates for all\n                scenes.\n            input_metas (list[dict]): Meta infos for all scenes.\n\n        Returns:\n            list[tuple[Tensor]]: Predicted bboxes, scores, and labels for\n                all scenes.\n        \"\"\"\n        results = []\n        for i in range(len(input_metas)):\n            result = self._get_bboxes_single(\n                center_preds=[x[i] for x in center_preds],\n                bbox_preds=[x[i] for x in bbox_preds],\n                cls_preds=[x[i] for x in cls_preds],\n                points=[x[i] for x in points],\n                input_meta=input_metas[i])\n            results.append(result)\n        return results\n\n    @staticmethod\n    def _bbox_to_loss(bbox):\n        \"\"\"Transform box to the axis-aligned or rotated iou loss format.\n\n        Args:\n            bbox (Tensor): 3D box of shape (N, 6) or (N, 7).\n\n        Returns:\n            Tensor: Transformed 3D box of shape (N, 6) or (N, 7).\n        \"\"\"\n        # rotated iou loss accepts (x, y, z, w, h, l, heading)\n        if bbox.shape[-1] != 6:\n            return bbox\n\n        # axis-aligned case: x, y, z, w, h, l -> x1, y1, z1, x2, y2, z2\n        return torch.stack(\n            (bbox[..., 0] - bbox[..., 3] / 2, bbox[..., 1] - bbox[..., 4] / 2,\n             bbox[..., 2] - bbox[..., 5] / 2, bbox[..., 0] + bbox[..., 3] / 2,\n             bbox[..., 1] + bbox[..., 4] / 2, bbox[..., 2] + bbox[..., 5] / 2),\n            dim=-1)\n\n    @staticmethod\n    def _bbox_pred_to_bbox(points, bbox_pred):\n        \"\"\"Transform predicted bbox parameters to bbox.\n\n        Args:\n            points (Tensor): Final locations of shape (N, 3)\n            bbox_pred (Tensor): Predicted bbox parameters of shape (N, 6)\n                or (N, 8).\n\n        Returns:\n            Tensor: Transformed 3D box of shape (N, 6) or (N, 7).\n        \"\"\"\n        if bbox_pred.shape[0] == 0:\n            return bbox_pred\n\n        x_center = points[:, 0] + (bbox_pred[:, 1] - bbox_pred[:, 0]) / 2\n        y_center = points[:, 1] + (bbox_pred[:, 3] - bbox_pred[:, 2]) / 2\n        z_center = points[:, 2] + (bbox_pred[:, 5] - bbox_pred[:, 4]) / 2\n\n        # dx_min, dx_max, dy_min, dy_max, dz_min, dz_max -> x, y, z, w, l, h\n        base_bbox = torch.stack([\n            x_center,\n            y_center,\n            z_center,\n            bbox_pred[:, 0] + bbox_pred[:, 1],\n            bbox_pred[:, 2] + bbox_pred[:, 3],\n            bbox_pred[:, 4] + bbox_pred[:, 5],\n        ], -1)\n\n        # axis-aligned case\n        if bbox_pred.shape[1] == 6:\n            return base_bbox\n\n        # rotated case: ..., sin(2a)ln(q), cos(2a)ln(q)\n        scale = bbox_pred[:, 0] + bbox_pred[:, 1] + \\\n            bbox_pred[:, 2] + bbox_pred[:, 3]\n        q = torch.exp(\n            torch.sqrt(\n                torch.pow(bbox_pred[:, 6], 2) + torch.pow(bbox_pred[:, 7], 2)))\n        alpha = 0.5 * torch.atan2(bbox_pred[:, 6], bbox_pred[:, 7])\n        return torch.stack(\n            (x_center, y_center, z_center, scale / (1 + q), scale /\n             (1 + q) * q, bbox_pred[:, 5] + bbox_pred[:, 4], alpha),\n            dim=-1)\n\n    @staticmethod\n    def _get_face_distances(points, boxes):\n        \"\"\"Calculate distances from point to box faces.\n\n        Args:\n            points (Tensor): Final locations of shape (N_points, N_boxes, 3).\n            boxes (Tensor): 3D boxes of shape (N_points, N_boxes, 7)\n\n        Returns:\n            Tensor: Face distances of shape (N_points, N_boxes, 6),\n                (dx_min, dx_max, dy_min, dy_max, dz_min, dz_max).\n        \"\"\"\n        shift = torch.stack(\n            (points[..., 0] - boxes[..., 0], points[..., 1] - boxes[..., 1],\n             points[..., 2] - boxes[..., 2]),\n            dim=-1).permute(1, 0, 2)\n        shift = rotation_3d_in_axis(\n            shift, -boxes[0, :, 6], axis=2).permute(1, 0, 2)\n        centers = boxes[..., :3] + shift\n        dx_min = centers[..., 0] - boxes[..., 0] + boxes[..., 3] / 2\n        dx_max = boxes[..., 0] + boxes[..., 3] / 2 - centers[..., 0]\n        dy_min = centers[..., 1] - boxes[..., 1] + boxes[..., 4] / 2\n        dy_max = boxes[..., 1] + boxes[..., 4] / 2 - centers[..., 1]\n        dz_min = centers[..., 2] - boxes[..., 2] + boxes[..., 5] / 2\n        dz_max = boxes[..., 2] + boxes[..., 5] / 2 - centers[..., 2]\n        return torch.stack((dx_min, dx_max, dy_min, dy_max, dz_min, dz_max),\n                           dim=-1)\n\n    @staticmethod\n    def _get_centerness(face_distances):\n        \"\"\"Compute point centerness w.r.t containing box.\n\n        Args:\n            face_distances (Tensor): Face distances of shape (B, N, 6),\n                (dx_min, dx_max, dy_min, dy_max, dz_min, dz_max).\n\n        Returns:\n            Tensor: Centerness of shape (B, N).\n        \"\"\"\n        x_dims = face_distances[..., [0, 1]]\n        y_dims = face_distances[..., [2, 3]]\n        z_dims = face_distances[..., [4, 5]]\n        centerness_targets = x_dims.min(dim=-1)[0] / x_dims.max(dim=-1)[0] * \\\n            y_dims.min(dim=-1)[0] / y_dims.max(dim=-1)[0] * \\\n            z_dims.min(dim=-1)[0] / z_dims.max(dim=-1)[0]\n        return torch.sqrt(centerness_targets)\n\n    @torch.no_grad()\n    def _get_targets(self, points, gt_bboxes, gt_labels):\n        \"\"\"Compute targets for final locations for a single scene.\n\n        Args:\n            points (list[Tensor]): Final locations for all levels.\n            gt_bboxes (BaseInstance3DBoxes): Ground truth boxes.\n            gt_labels (Tensor): Ground truth labels.\n\n        Returns:\n            tuple[Tensor]: Centerness, bbox and classification\n                targets for all locations.\n        \"\"\"\n        float_max = points[0].new_tensor(1e8)\n        n_levels = len(points)\n        levels = torch.cat([\n            points[i].new_tensor(i).expand(len(points[i]))\n            for i in range(len(points))\n        ])\n        points = torch.cat(points)\n        gt_bboxes = gt_bboxes.to(points.device)\n        n_points = len(points)\n        n_boxes = len(gt_bboxes)\n        volumes = gt_bboxes.volume.unsqueeze(0).expand(n_points, n_boxes)\n\n        # condition 1: point inside box\n        boxes = torch.cat((gt_bboxes.gravity_center, gt_bboxes.tensor[:, 3:]),\n                          dim=1)\n        boxes = boxes.expand(n_points, n_boxes, 7)\n        points = points.unsqueeze(1).expand(n_points, n_boxes, 3)\n        face_distances = self._get_face_distances(points, boxes)\n        inside_box_condition = face_distances.min(dim=-1).values > 0\n\n        # condition 2: positive points per level >= limit\n        # calculate positive points per scale\n        n_pos_points_per_level = []\n        for i in range(n_levels):\n            n_pos_points_per_level.append(\n                torch.sum(inside_box_condition[levels == i], dim=0))\n        # find best level\n        n_pos_points_per_level = torch.stack(n_pos_points_per_level, dim=0)\n        lower_limit_mask = n_pos_points_per_level < self.pts_assign_threshold\n        lower_index = torch.argmax(lower_limit_mask.int(), dim=0) - 1\n        lower_index = torch.where(lower_index < 0, 0, lower_index)\n        all_upper_limit_mask = torch.all(\n            torch.logical_not(lower_limit_mask), dim=0)\n        best_level = torch.where(all_upper_limit_mask, n_levels - 1,\n                                 lower_index)\n        # keep only points with best level\n        best_level = best_level.expand(n_points, n_boxes)\n        levels = torch.unsqueeze(levels, 1).expand(n_points, n_boxes)\n        level_condition = best_level == levels\n\n        # condition 3: limit topk points per box by centerness\n        centerness = self._get_centerness(face_distances)\n        centerness = torch.where(inside_box_condition, centerness,\n                                 torch.ones_like(centerness) * -1)\n        centerness = torch.where(level_condition, centerness,\n                                 torch.ones_like(centerness) * -1)\n        top_centerness = torch.topk(\n            centerness,\n            min(self.pts_center_threshold + 1, len(centerness)),\n            dim=0).values[-1]\n        topk_condition = centerness > top_centerness.unsqueeze(0)\n\n        # condition 4: min volume box per point\n        volumes = torch.where(inside_box_condition, volumes, float_max)\n        volumes = torch.where(level_condition, volumes, float_max)\n        volumes = torch.where(topk_condition, volumes, float_max)\n        min_volumes, min_inds = volumes.min(dim=1)\n\n        center_targets = centerness[torch.arange(n_points), min_inds]\n        bbox_targets = boxes[torch.arange(n_points), min_inds]\n        if not gt_bboxes.with_yaw:\n            bbox_targets = bbox_targets[:, :-1]\n        cls_targets = gt_labels[min_inds]\n        cls_targets = torch.where(min_volumes == float_max, -1, cls_targets)\n        return center_targets, bbox_targets, cls_targets\n\n    def _single_scene_multiclass_nms(self, bboxes, scores, input_meta):\n        \"\"\"Multi-class nms for a single scene.\n\n        Args:\n            bboxes (Tensor): Predicted boxes of shape (N_boxes, 6) or\n                (N_boxes, 7).\n            scores (Tensor): Predicted scores of shape (N_boxes, N_classes).\n            input_meta (dict): Scene meta data.\n\n        Returns:\n            tuple[Tensor]: Predicted bboxes, scores and labels.\n        \"\"\"\n        n_classes = scores.shape[1]\n        with_yaw = bboxes.shape[1] == 7\n        nms_bboxes, nms_scores, nms_labels = [], [], []\n        for i in range(n_classes):\n            ids = scores[:, i] > self.test_cfg.score_thr\n            if not ids.any():\n                continue\n\n            class_scores = scores[ids, i]\n            class_bboxes = bboxes[ids]\n            if with_yaw:\n                nms_function = nms3d\n            else:\n                class_bboxes = torch.cat(\n                    (class_bboxes, torch.zeros_like(class_bboxes[:, :1])),\n                    dim=1)\n                nms_function = nms3d_normal\n\n            nms_ids = nms_function(class_bboxes, class_scores,\n                                   self.test_cfg.iou_thr)\n            nms_bboxes.append(class_bboxes[nms_ids])\n            nms_scores.append(class_scores[nms_ids])\n            nms_labels.append(\n                bboxes.new_full(\n                    class_scores[nms_ids].shape, i, dtype=torch.long))\n\n        if len(nms_bboxes):\n            nms_bboxes = torch.cat(nms_bboxes, dim=0)\n            nms_scores = torch.cat(nms_scores, dim=0)\n            nms_labels = torch.cat(nms_labels, dim=0)\n        else:\n            nms_bboxes = bboxes.new_zeros((0, bboxes.shape[1]))\n            nms_scores = bboxes.new_zeros((0, ))\n            nms_labels = bboxes.new_zeros((0, ))\n\n        if with_yaw:\n            box_dim = 7\n        else:\n            box_dim = 6\n            nms_bboxes = nms_bboxes[:, :6]\n        nms_bboxes = input_meta['box_type_3d'](\n            nms_bboxes,\n            box_dim=box_dim,\n            with_yaw=with_yaw,\n            origin=(.5, .5, .5))\n\n        return nms_bboxes, nms_scores, nms_labels\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/fcos_mono3d_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom logging import warning\n\nimport numpy as np\nimport torch\nfrom mmcv.cnn import Scale, normal_init\nfrom mmcv.runner import force_fp32\nfrom torch import nn as nn\n\nfrom mmdet3d.core import (box3d_multiclass_nms, limit_period, points_img2cam,\n                          xywhr2xyxyr)\nfrom mmdet.core import multi_apply\nfrom mmdet.core.bbox.builder import build_bbox_coder\nfrom ..builder import HEADS, build_loss\nfrom .anchor_free_mono3d_head import AnchorFreeMono3DHead\n\nINF = 1e8\n\n\n@HEADS.register_module()\nclass FCOSMono3DHead(AnchorFreeMono3DHead):\n    \"\"\"Anchor-free head used in FCOS3D.\n\n    Args:\n        num_classes (int): Number of categories excluding the background\n            category.\n        in_channels (int): Number of channels in the input feature map.\n        regress_ranges (tuple[tuple[int, int]], optional): Regress range of multiple\n            level points.\n        center_sampling (bool, optional): If true, use center sampling. Default: True.\n        center_sample_radius (float, optional): Radius of center sampling. Default: 1.5.\n        norm_on_bbox (bool, optional): If true, normalize the regression targets\n            with FPN strides. Default: True.\n        centerness_on_reg (bool, optional): If true, position centerness on the\n            regress branch. Please refer to https://github.com/tianzhi0549/FCOS/issues/89#issuecomment-516877042.\n            Default: True.\n        centerness_alpha (int, optional): Parameter used to adjust the intensity\n            attenuation from the center to the periphery. Default: 2.5.\n        loss_cls (dict, optional): Config of classification loss.\n        loss_bbox (dict, optional): Config of localization loss.\n        loss_dir (dict, optional): Config of direction classification loss.\n        loss_attr (dict, optional): Config of attribute classification loss.\n        loss_centerness (dict, optional): Config of centerness loss.\n        norm_cfg (dict, optional): dictionary to construct and config norm layer.\n            Default: norm_cfg=dict(type='GN', num_groups=32, requires_grad=True).\n        centerness_branch (tuple[int], optional): Channels for centerness branch.\n            Default: (64, ).\n    \"\"\"  # noqa: E501\n\n    def __init__(self,\n                 regress_ranges=((-1, 48), (48, 96), (96, 192), (192, 384),\n                                 (384, INF)),\n                 center_sampling=True,\n                 center_sample_radius=1.5,\n                 norm_on_bbox=True,\n                 centerness_on_reg=True,\n                 centerness_alpha=2.5,\n                 loss_cls=dict(\n                     type='FocalLoss',\n                     use_sigmoid=True,\n                     gamma=2.0,\n                     alpha=0.25,\n                     loss_weight=1.0),\n                 loss_bbox=dict(\n                     type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),\n                 loss_dir=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=False,\n                     loss_weight=1.0),\n                 loss_attr=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=False,\n                     loss_weight=1.0),\n                 loss_centerness=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     loss_weight=1.0),\n                 bbox_coder=dict(type='FCOS3DBBoxCoder', code_size=9),\n                 norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),\n                 centerness_branch=(64, ),\n                 init_cfg=None,\n                 **kwargs):\n        self.regress_ranges = regress_ranges\n        self.center_sampling = center_sampling\n        self.center_sample_radius = center_sample_radius\n        self.norm_on_bbox = norm_on_bbox\n        self.centerness_on_reg = centerness_on_reg\n        self.centerness_alpha = centerness_alpha\n        self.centerness_branch = centerness_branch\n        super().__init__(\n            loss_cls=loss_cls,\n            loss_bbox=loss_bbox,\n            loss_dir=loss_dir,\n            loss_attr=loss_attr,\n            norm_cfg=norm_cfg,\n            init_cfg=init_cfg,\n            **kwargs)\n        self.loss_centerness = build_loss(loss_centerness)\n        bbox_coder['code_size'] = self.bbox_code_size\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the head.\"\"\"\n        super()._init_layers()\n        self.conv_centerness_prev = self._init_branch(\n            conv_channels=self.centerness_branch,\n            conv_strides=(1, ) * len(self.centerness_branch))\n        self.conv_centerness = nn.Conv2d(self.centerness_branch[-1], 1, 1)\n        self.scale_dim = 3  # only for offset, depth and size regression\n        self.scales = nn.ModuleList([\n            nn.ModuleList([Scale(1.0) for _ in range(self.scale_dim)])\n            for _ in self.strides\n        ])\n\n    def init_weights(self):\n        \"\"\"Initialize weights of the head.\n\n        We currently still use the customized init_weights because the default\n        init of DCN triggered by the init_cfg will init conv_offset.weight,\n        which mistakenly affects the training stability.\n        \"\"\"\n        super().init_weights()\n        for m in self.conv_centerness_prev:\n            if isinstance(m.conv, nn.Conv2d):\n                normal_init(m.conv, std=0.01)\n        normal_init(self.conv_centerness, std=0.01)\n\n    def forward(self, feats):\n        \"\"\"Forward features from the upstream network.\n\n        Args:\n            feats (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n\n        Returns:\n            tuple:\n                cls_scores (list[Tensor]): Box scores for each scale level,\n                    each is a 4D-tensor, the channel number is\n                    num_points * num_classes.\n                bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                    level, each is a 4D-tensor, the channel number is\n                    num_points * bbox_code_size.\n                dir_cls_preds (list[Tensor]): Box scores for direction class\n                    predictions on each scale level, each is a 4D-tensor,\n                    the channel number is num_points * 2. (bin = 2).\n                attr_preds (list[Tensor]): Attribute scores for each scale\n                    level, each is a 4D-tensor, the channel number is\n                    num_points * num_attrs.\n                centernesses (list[Tensor]): Centerness for each scale level,\n                    each is a 4D-tensor, the channel number is num_points * 1.\n        \"\"\"\n        # Note: we use [:5] to filter feats and only return predictions\n        return multi_apply(self.forward_single, feats, self.scales,\n                           self.strides)[:5]\n\n    def forward_single(self, x, scale, stride):\n        \"\"\"Forward features of a single scale level.\n\n        Args:\n            x (Tensor): FPN feature maps of the specified stride.\n            scale (:obj: `mmcv.cnn.Scale`): Learnable scale module to resize\n                the bbox prediction.\n            stride (int): The corresponding stride for feature maps, only\n                used to normalize the bbox prediction when self.norm_on_bbox\n                is True.\n\n        Returns:\n            tuple: scores for each class, bbox and direction class\n                predictions, centerness predictions of input feature maps.\n        \"\"\"\n        cls_score, bbox_pred, dir_cls_pred, attr_pred, cls_feat, reg_feat = \\\n            super().forward_single(x)\n\n        if self.centerness_on_reg:\n            clone_reg_feat = reg_feat.clone()\n            for conv_centerness_prev_layer in self.conv_centerness_prev:\n                clone_reg_feat = conv_centerness_prev_layer(clone_reg_feat)\n            centerness = self.conv_centerness(clone_reg_feat)\n        else:\n            clone_cls_feat = cls_feat.clone()\n            for conv_centerness_prev_layer in self.conv_centerness_prev:\n                clone_cls_feat = conv_centerness_prev_layer(clone_cls_feat)\n            centerness = self.conv_centerness(clone_cls_feat)\n\n        bbox_pred = self.bbox_coder.decode(bbox_pred, scale, stride,\n                                           self.training, cls_score)\n\n        return cls_score, bbox_pred, dir_cls_pred, attr_pred, centerness, \\\n            cls_feat, reg_feat\n\n    @staticmethod\n    def add_sin_difference(boxes1, boxes2):\n        \"\"\"Convert the rotation difference to difference in sine function.\n\n        Args:\n            boxes1 (torch.Tensor): Original Boxes in shape (NxC), where C>=7\n                and the 7th dimension is rotation dimension.\n            boxes2 (torch.Tensor): Target boxes in shape (NxC), where C>=7 and\n                the 7th dimension is rotation dimension.\n\n        Returns:\n            tuple[torch.Tensor]: ``boxes1`` and ``boxes2`` whose 7th\n                dimensions are changed.\n        \"\"\"\n        rad_pred_encoding = torch.sin(boxes1[..., 6:7]) * torch.cos(\n            boxes2[..., 6:7])\n        rad_tg_encoding = torch.cos(boxes1[..., 6:7]) * torch.sin(boxes2[...,\n                                                                         6:7])\n        boxes1 = torch.cat(\n            [boxes1[..., :6], rad_pred_encoding, boxes1[..., 7:]], dim=-1)\n        boxes2 = torch.cat([boxes2[..., :6], rad_tg_encoding, boxes2[..., 7:]],\n                           dim=-1)\n        return boxes1, boxes2\n\n    @staticmethod\n    def get_direction_target(reg_targets,\n                             dir_offset=0,\n                             dir_limit_offset=0.0,\n                             num_bins=2,\n                             one_hot=True):\n        \"\"\"Encode direction to 0 ~ num_bins-1.\n\n        Args:\n            reg_targets (torch.Tensor): Bbox regression targets.\n            dir_offset (int, optional): Direction offset. Default to 0.\n            dir_limit_offset (float, optional): Offset to set the direction\n                range. Default to 0.0.\n            num_bins (int, optional): Number of bins to divide 2*PI.\n                Default to 2.\n            one_hot (bool, optional): Whether to encode as one hot.\n                Default to True.\n\n        Returns:\n            torch.Tensor: Encoded direction targets.\n        \"\"\"\n        rot_gt = reg_targets[..., 6]\n        offset_rot = limit_period(rot_gt - dir_offset, dir_limit_offset,\n                                  2 * np.pi)\n        dir_cls_targets = torch.floor(offset_rot /\n                                      (2 * np.pi / num_bins)).long()\n        dir_cls_targets = torch.clamp(dir_cls_targets, min=0, max=num_bins - 1)\n        if one_hot:\n            dir_targets = torch.zeros(\n                *list(dir_cls_targets.shape),\n                num_bins,\n                dtype=reg_targets.dtype,\n                device=dir_cls_targets.device)\n            dir_targets.scatter_(dir_cls_targets.unsqueeze(dim=-1).long(), 1.0)\n            dir_cls_targets = dir_targets\n        return dir_cls_targets\n\n    @force_fp32(\n        apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds', 'attr_preds',\n                  'centernesses'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             dir_cls_preds,\n             attr_preds,\n             centernesses,\n             gt_bboxes,\n             gt_labels,\n             gt_bboxes_3d,\n             gt_labels_3d,\n             centers2d,\n             depths,\n             attr_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute loss of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level,\n                each is a 4D-tensor, the channel number is\n                num_points * num_classes.\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level, each is a 4D-tensor, the channel number is\n                num_points * bbox_code_size.\n            dir_cls_preds (list[Tensor]): Box scores for direction class\n                predictions on each scale level, each is a 4D-tensor,\n                the channel number is num_points * 2. (bin = 2)\n            attr_preds (list[Tensor]): Attribute scores for each scale level,\n                each is a 4D-tensor, the channel number is\n                num_points * num_attrs.\n            centernesses (list[Tensor]): Centerness for each scale level, each\n                is a 4D-tensor, the channel number is num_points * 1.\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            gt_bboxes_3d (list[Tensor]): 3D boxes ground truth with shape of\n                (num_gts, code_size).\n            gt_labels_3d (list[Tensor]): same as gt_labels\n            centers2d (list[Tensor]): 2D centers on the image with shape of\n                (num_gts, 2).\n            depths (list[Tensor]): Depth ground truth with shape of\n                (num_gts, ).\n            attr_labels (list[Tensor]): Attributes indices of each box.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        assert len(cls_scores) == len(bbox_preds) == len(centernesses) == len(\n            attr_preds)\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        all_level_points = self.get_points(featmap_sizes, bbox_preds[0].dtype,\n                                           bbox_preds[0].device)\n        labels_3d, bbox_targets_3d, centerness_targets, attr_targets = \\\n            self.get_targets(\n                all_level_points, gt_bboxes, gt_labels, gt_bboxes_3d,\n                gt_labels_3d, centers2d, depths, attr_labels)\n\n        num_imgs = cls_scores[0].size(0)\n        # flatten cls_scores, bbox_preds, dir_cls_preds and centerness\n        flatten_cls_scores = [\n            cls_score.permute(0, 2, 3, 1).reshape(-1, self.cls_out_channels)\n            for cls_score in cls_scores\n        ]\n        flatten_bbox_preds = [\n            bbox_pred.permute(0, 2, 3, 1).reshape(-1, sum(self.group_reg_dims))\n            for bbox_pred in bbox_preds\n        ]\n        flatten_dir_cls_preds = [\n            dir_cls_pred.permute(0, 2, 3, 1).reshape(-1, 2)\n            for dir_cls_pred in dir_cls_preds\n        ]\n        flatten_centerness = [\n            centerness.permute(0, 2, 3, 1).reshape(-1)\n            for centerness in centernesses\n        ]\n        flatten_cls_scores = torch.cat(flatten_cls_scores)\n        flatten_bbox_preds = torch.cat(flatten_bbox_preds)\n        flatten_dir_cls_preds = torch.cat(flatten_dir_cls_preds)\n        flatten_centerness = torch.cat(flatten_centerness)\n        flatten_labels_3d = torch.cat(labels_3d)\n        flatten_bbox_targets_3d = torch.cat(bbox_targets_3d)\n        flatten_centerness_targets = torch.cat(centerness_targets)\n\n        # FG cat_id: [0, num_classes -1], BG cat_id: num_classes\n        bg_class_ind = self.num_classes\n        pos_inds = ((flatten_labels_3d >= 0)\n                    & (flatten_labels_3d < bg_class_ind)).nonzero().reshape(-1)\n        num_pos = len(pos_inds)\n\n        loss_cls = self.loss_cls(\n            flatten_cls_scores,\n            flatten_labels_3d,\n            avg_factor=num_pos + num_imgs)  # avoid num_pos is 0\n\n        pos_bbox_preds = flatten_bbox_preds[pos_inds]\n        pos_dir_cls_preds = flatten_dir_cls_preds[pos_inds]\n        pos_centerness = flatten_centerness[pos_inds]\n\n        if self.pred_attrs:\n            flatten_attr_preds = [\n                attr_pred.permute(0, 2, 3, 1).reshape(-1, self.num_attrs)\n                for attr_pred in attr_preds\n            ]\n            flatten_attr_preds = torch.cat(flatten_attr_preds)\n            flatten_attr_targets = torch.cat(attr_targets)\n            pos_attr_preds = flatten_attr_preds[pos_inds]\n\n        if num_pos > 0:\n            pos_bbox_targets_3d = flatten_bbox_targets_3d[pos_inds]\n            pos_centerness_targets = flatten_centerness_targets[pos_inds]\n            if self.pred_attrs:\n                pos_attr_targets = flatten_attr_targets[pos_inds]\n            bbox_weights = pos_centerness_targets.new_ones(\n                len(pos_centerness_targets), sum(self.group_reg_dims))\n            equal_weights = pos_centerness_targets.new_ones(\n                pos_centerness_targets.shape)\n\n            code_weight = self.train_cfg.get('code_weight', None)\n            if code_weight:\n                assert len(code_weight) == sum(self.group_reg_dims)\n                bbox_weights = bbox_weights * bbox_weights.new_tensor(\n                    code_weight)\n\n            if self.use_direction_classifier:\n                pos_dir_cls_targets = self.get_direction_target(\n                    pos_bbox_targets_3d,\n                    self.dir_offset,\n                    self.dir_limit_offset,\n                    one_hot=False)\n\n            if self.diff_rad_by_sin:\n                pos_bbox_preds, pos_bbox_targets_3d = self.add_sin_difference(\n                    pos_bbox_preds, pos_bbox_targets_3d)\n\n            loss_offset = self.loss_bbox(\n                pos_bbox_preds[:, :2],\n                pos_bbox_targets_3d[:, :2],\n                weight=bbox_weights[:, :2],\n                avg_factor=equal_weights.sum())\n            loss_depth = self.loss_bbox(\n                pos_bbox_preds[:, 2],\n                pos_bbox_targets_3d[:, 2],\n                weight=bbox_weights[:, 2],\n                avg_factor=equal_weights.sum())\n            loss_size = self.loss_bbox(\n                pos_bbox_preds[:, 3:6],\n                pos_bbox_targets_3d[:, 3:6],\n                weight=bbox_weights[:, 3:6],\n                avg_factor=equal_weights.sum())\n            loss_rotsin = self.loss_bbox(\n                pos_bbox_preds[:, 6],\n                pos_bbox_targets_3d[:, 6],\n                weight=bbox_weights[:, 6],\n                avg_factor=equal_weights.sum())\n            loss_velo = None\n            if self.pred_velo:\n                loss_velo = self.loss_bbox(\n                    pos_bbox_preds[:, 7:9],\n                    pos_bbox_targets_3d[:, 7:9],\n                    weight=bbox_weights[:, 7:9],\n                    avg_factor=equal_weights.sum())\n\n            loss_centerness = self.loss_centerness(pos_centerness,\n                                                   pos_centerness_targets)\n\n            # direction classification loss\n            loss_dir = None\n            # TODO: add more check for use_direction_classifier\n            if self.use_direction_classifier:\n                loss_dir = self.loss_dir(\n                    pos_dir_cls_preds,\n                    pos_dir_cls_targets,\n                    equal_weights,\n                    avg_factor=equal_weights.sum())\n\n            # attribute classification loss\n            loss_attr = None\n            if self.pred_attrs:\n                loss_attr = self.loss_attr(\n                    pos_attr_preds,\n                    pos_attr_targets,\n                    pos_centerness_targets,\n                    avg_factor=pos_centerness_targets.sum())\n\n        else:\n            # need absolute due to possible negative delta x/y\n            loss_offset = pos_bbox_preds[:, :2].sum()\n            loss_depth = pos_bbox_preds[:, 2].sum()\n            loss_size = pos_bbox_preds[:, 3:6].sum()\n            loss_rotsin = pos_bbox_preds[:, 6].sum()\n            loss_velo = None\n            if self.pred_velo:\n                loss_velo = pos_bbox_preds[:, 7:9].sum()\n            loss_centerness = pos_centerness.sum()\n            loss_dir = None\n            if self.use_direction_classifier:\n                loss_dir = pos_dir_cls_preds.sum()\n            loss_attr = None\n            if self.pred_attrs:\n                loss_attr = pos_attr_preds.sum()\n\n        loss_dict = dict(\n            loss_cls=loss_cls,\n            loss_offset=loss_offset,\n            loss_depth=loss_depth,\n            loss_size=loss_size,\n            loss_rotsin=loss_rotsin,\n            loss_centerness=loss_centerness)\n\n        if loss_velo is not None:\n            loss_dict['loss_velo'] = loss_velo\n\n        if loss_dir is not None:\n            loss_dict['loss_dir'] = loss_dir\n\n        if loss_attr is not None:\n            loss_dict['loss_attr'] = loss_attr\n\n        return loss_dict\n\n    @force_fp32(\n        apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds', 'attr_preds',\n                  'centernesses'))\n    def get_bboxes(self,\n                   cls_scores,\n                   bbox_preds,\n                   dir_cls_preds,\n                   attr_preds,\n                   centernesses,\n                   img_metas,\n                   cfg=None,\n                   rescale=None):\n        \"\"\"Transform network output for a batch into bbox predictions.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level\n                Has shape (N, num_points * num_classes, H, W)\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level with shape (N, num_points * 4, H, W)\n            dir_cls_preds (list[Tensor]): Box scores for direction class\n                predictions on each scale level, each is a 4D-tensor,\n                the channel number is num_points * 2. (bin = 2)\n            attr_preds (list[Tensor]): Attribute scores for each scale level\n                Has shape (N, num_points * num_attrs, H, W)\n            centernesses (list[Tensor]): Centerness for each scale level with\n                shape (N, num_points * 1, H, W)\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            cfg (mmcv.Config): Test / postprocessing configuration,\n                if None, test_cfg would be used\n            rescale (bool): If True, return boxes in original image space\n\n        Returns:\n            list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.\n                The first item is an (n, 5) tensor, where the first 4 columns\n                are bounding box positions (tl_x, tl_y, br_x, br_y) and the\n                5-th column is a score between 0 and 1. The second item is a\n                (n,) tensor where each item is the predicted class label of\n                the corresponding box.\n        \"\"\"\n        assert len(cls_scores) == len(bbox_preds) == len(dir_cls_preds) == \\\n            len(centernesses) == len(attr_preds)\n        num_levels = len(cls_scores)\n\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        mlvl_points = self.get_points(featmap_sizes, bbox_preds[0].dtype,\n                                      bbox_preds[0].device)\n        result_list = []\n        for img_id in range(len(img_metas)):\n            cls_score_list = [\n                cls_scores[i][img_id].detach() for i in range(num_levels)\n            ]\n            bbox_pred_list = [\n                bbox_preds[i][img_id].detach() for i in range(num_levels)\n            ]\n            if self.use_direction_classifier:\n                dir_cls_pred_list = [\n                    dir_cls_preds[i][img_id].detach()\n                    for i in range(num_levels)\n                ]\n            else:\n                dir_cls_pred_list = [\n                    cls_scores[i][img_id].new_full(\n                        [2, *cls_scores[i][img_id].shape[1:]], 0).detach()\n                    for i in range(num_levels)\n                ]\n            if self.pred_attrs:\n                attr_pred_list = [\n                    attr_preds[i][img_id].detach() for i in range(num_levels)\n                ]\n            else:\n                attr_pred_list = [\n                    cls_scores[i][img_id].new_full(\n                        [self.num_attrs, *cls_scores[i][img_id].shape[1:]],\n                        self.attr_background_label).detach()\n                    for i in range(num_levels)\n                ]\n            centerness_pred_list = [\n                centernesses[i][img_id].detach() for i in range(num_levels)\n            ]\n            input_meta = img_metas[img_id]\n            det_bboxes = self._get_bboxes_single(\n                cls_score_list, bbox_pred_list, dir_cls_pred_list,\n                attr_pred_list, centerness_pred_list, mlvl_points, input_meta,\n                cfg, rescale)\n            result_list.append(det_bboxes)\n        return result_list\n\n    def _get_bboxes_single(self,\n                           cls_scores,\n                           bbox_preds,\n                           dir_cls_preds,\n                           attr_preds,\n                           centernesses,\n                           mlvl_points,\n                           input_meta,\n                           cfg,\n                           rescale=False):\n        \"\"\"Transform outputs for a single batch item into bbox predictions.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for a single scale level\n                Has shape (num_points * num_classes, H, W).\n            bbox_preds (list[Tensor]): Box energies / deltas for a single scale\n                level with shape (num_points * bbox_code_size, H, W).\n            dir_cls_preds (list[Tensor]): Box scores for direction class\n                predictions on a single scale level with shape\n                (num_points * 2, H, W)\n            attr_preds (list[Tensor]): Attribute scores for each scale level\n                Has shape (N, num_points * num_attrs, H, W)\n            centernesses (list[Tensor]): Centerness for a single scale level\n                with shape (num_points, H, W).\n            mlvl_points (list[Tensor]): Box reference for a single scale level\n                with shape (num_total_points, 2).\n            input_meta (dict): Metadata of input image.\n            cfg (mmcv.Config): Test / postprocessing configuration,\n                if None, test_cfg would be used.\n            rescale (bool): If True, return boxes in original image space.\n\n        Returns:\n            tuples[Tensor]: Predicted 3D boxes, scores, labels and attributes.\n        \"\"\"\n        view = np.array(input_meta['cam2img'])\n        scale_factor = input_meta['scale_factor']\n        cfg = self.test_cfg if cfg is None else cfg\n        assert len(cls_scores) == len(bbox_preds) == len(mlvl_points)\n        mlvl_centers2d = []\n        mlvl_bboxes = []\n        mlvl_scores = []\n        mlvl_dir_scores = []\n        mlvl_attr_scores = []\n        mlvl_centerness = []\n\n        for cls_score, bbox_pred, dir_cls_pred, attr_pred, centerness, \\\n                points in zip(cls_scores, bbox_preds, dir_cls_preds,\n                              attr_preds, centernesses, mlvl_points):\n            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]\n            scores = cls_score.permute(1, 2, 0).reshape(\n                -1, self.cls_out_channels).sigmoid()\n            dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2)\n            dir_cls_score = torch.max(dir_cls_pred, dim=-1)[1]\n            attr_pred = attr_pred.permute(1, 2, 0).reshape(-1, self.num_attrs)\n            attr_score = torch.max(attr_pred, dim=-1)[1]\n            centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid()\n\n            bbox_pred = bbox_pred.permute(1, 2,\n                                          0).reshape(-1,\n                                                     sum(self.group_reg_dims))\n            bbox_pred = bbox_pred[:, :self.bbox_code_size]\n            nms_pre = cfg.get('nms_pre', -1)\n            if nms_pre > 0 and scores.shape[0] > nms_pre:\n                max_scores, _ = (scores * centerness[:, None]).max(dim=1)\n                _, topk_inds = max_scores.topk(nms_pre)\n                points = points[topk_inds, :]\n                bbox_pred = bbox_pred[topk_inds, :]\n                scores = scores[topk_inds, :]\n                dir_cls_pred = dir_cls_pred[topk_inds, :]\n                centerness = centerness[topk_inds]\n                dir_cls_score = dir_cls_score[topk_inds]\n                attr_score = attr_score[topk_inds]\n            # change the offset to actual center predictions\n            bbox_pred[:, :2] = points - bbox_pred[:, :2]\n            if rescale:\n                bbox_pred[:, :2] /= bbox_pred[:, :2].new_tensor(scale_factor)\n            pred_center2d = bbox_pred[:, :3].clone()\n            bbox_pred[:, :3] = points_img2cam(bbox_pred[:, :3], view)\n            mlvl_centers2d.append(pred_center2d)\n            mlvl_bboxes.append(bbox_pred)\n            mlvl_scores.append(scores)\n            mlvl_dir_scores.append(dir_cls_score)\n            mlvl_attr_scores.append(attr_score)\n            mlvl_centerness.append(centerness)\n\n        mlvl_centers2d = torch.cat(mlvl_centers2d)\n        mlvl_bboxes = torch.cat(mlvl_bboxes)\n        mlvl_dir_scores = torch.cat(mlvl_dir_scores)\n\n        # change local yaw to global yaw for 3D nms\n        cam2img = mlvl_centers2d.new_zeros((4, 4))\n        cam2img[:view.shape[0], :view.shape[1]] = \\\n            mlvl_centers2d.new_tensor(view)\n        mlvl_bboxes = self.bbox_coder.decode_yaw(mlvl_bboxes, mlvl_centers2d,\n                                                 mlvl_dir_scores,\n                                                 self.dir_offset, cam2img)\n\n        mlvl_bboxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](\n            mlvl_bboxes, box_dim=self.bbox_code_size,\n            origin=(0.5, 0.5, 0.5)).bev)\n\n        mlvl_scores = torch.cat(mlvl_scores)\n        padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)\n        # remind that we set FG labels to [0, num_class-1] since mmdet v2.0\n        # BG cat_id: num_class\n        mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)\n        mlvl_attr_scores = torch.cat(mlvl_attr_scores)\n        mlvl_centerness = torch.cat(mlvl_centerness)\n        # no scale_factors in box3d_multiclass_nms\n        # Then we multiply it from outside\n        mlvl_nms_scores = mlvl_scores * mlvl_centerness[:, None]\n        results = box3d_multiclass_nms(mlvl_bboxes, mlvl_bboxes_for_nms,\n                                       mlvl_nms_scores, cfg.score_thr,\n                                       cfg.max_per_img, cfg, mlvl_dir_scores,\n                                       mlvl_attr_scores)\n        bboxes, scores, labels, dir_scores, attrs = results\n        attrs = attrs.to(labels.dtype)  # change data type to int\n        bboxes = input_meta['box_type_3d'](\n            bboxes, box_dim=self.bbox_code_size, origin=(0.5, 0.5, 0.5))\n        # Note that the predictions use origin (0.5, 0.5, 0.5)\n        # Due to the ground truth centers2d are the gravity center of objects\n        # v0.10.0 fix inplace operation to the input tensor of cam_box3d\n        # So here we also need to add origin=(0.5, 0.5, 0.5)\n        if not self.pred_attrs:\n            attrs = None\n\n        return bboxes, scores, labels, attrs\n\n    @staticmethod\n    def pts2Dto3D(points, view):\n        \"\"\"\n        Args:\n            points (torch.Tensor): points in 2D images, [N, 3],\n                3 corresponds with x, y in the image and depth.\n            view (np.ndarray): camera intrinsic, [3, 3]\n\n        Returns:\n            torch.Tensor: points in 3D space. [N, 3],\n                3 corresponds with x, y, z in 3D space.\n        \"\"\"\n        warning.warn('DeprecationWarning: This static method has been moved '\n                     'out of this class to mmdet3d/core. The function '\n                     'pts2Dto3D will be deprecated.')\n\n        assert view.shape[0] <= 4\n        assert view.shape[1] <= 4\n        assert points.shape[1] == 3\n\n        points2D = points[:, :2]\n        depths = points[:, 2].view(-1, 1)\n        unnorm_points2D = torch.cat([points2D * depths, depths], dim=1)\n\n        viewpad = torch.eye(4, dtype=points2D.dtype, device=points2D.device)\n        viewpad[:view.shape[0], :view.shape[1]] = points2D.new_tensor(view)\n        inv_viewpad = torch.inverse(viewpad).transpose(0, 1)\n\n        # Do operation in homogeneous coordinates.\n        nbr_points = unnorm_points2D.shape[0]\n        homo_points2D = torch.cat(\n            [unnorm_points2D,\n             points2D.new_ones((nbr_points, 1))], dim=1)\n        points3D = torch.mm(homo_points2D, inv_viewpad)[:, :3]\n\n        return points3D\n\n    def _get_points_single(self,\n                           featmap_size,\n                           stride,\n                           dtype,\n                           device,\n                           flatten=False):\n        \"\"\"Get points according to feature map sizes.\"\"\"\n        y, x = super()._get_points_single(featmap_size, stride, dtype, device)\n        points = torch.stack((x.reshape(-1) * stride, y.reshape(-1) * stride),\n                             dim=-1) + stride // 2\n        return points\n\n    def get_targets(self, points, gt_bboxes_list, gt_labels_list,\n                    gt_bboxes_3d_list, gt_labels_3d_list, centers2d_list,\n                    depths_list, attr_labels_list):\n        \"\"\"Compute regression, classification and centerss targets for points\n        in multiple images.\n\n        Args:\n            points (list[Tensor]): Points of each fpn level, each has shape\n                (num_points, 2).\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image,\n                each has shape (num_gt, 4).\n            gt_labels_list (list[Tensor]): Ground truth labels of each box,\n                each has shape (num_gt,).\n            gt_bboxes_3d_list (list[Tensor]): 3D Ground truth bboxes of each\n                image, each has shape (num_gt, bbox_code_size).\n            gt_labels_3d_list (list[Tensor]): 3D Ground truth labels of each\n                box, each has shape (num_gt,).\n            centers2d_list (list[Tensor]): Projected 3D centers onto 2D image,\n                each has shape (num_gt, 2).\n            depths_list (list[Tensor]): Depth of projected 3D centers onto 2D\n                image, each has shape (num_gt, 1).\n            attr_labels_list (list[Tensor]): Attribute labels of each box,\n                each has shape (num_gt,).\n\n        Returns:\n            tuple:\n                concat_lvl_labels (list[Tensor]): Labels of each level.\n                concat_lvl_bbox_targets (list[Tensor]): BBox targets of each\n                    level.\n        \"\"\"\n        assert len(points) == len(self.regress_ranges)\n        num_levels = len(points)\n        # expand regress ranges to align with points\n        expanded_regress_ranges = [\n            points[i].new_tensor(self.regress_ranges[i])[None].expand_as(\n                points[i]) for i in range(num_levels)\n        ]\n        # concat all levels points and regress ranges\n        concat_regress_ranges = torch.cat(expanded_regress_ranges, dim=0)\n        concat_points = torch.cat(points, dim=0)\n\n        # the number of points per img, per lvl\n        num_points = [center.size(0) for center in points]\n\n        if attr_labels_list is None:\n            attr_labels_list = [\n                gt_labels.new_full(gt_labels.shape, self.attr_background_label)\n                for gt_labels in gt_labels_list\n            ]\n\n        # get labels and bbox_targets of each image\n        _, _, labels_3d_list, bbox_targets_3d_list, centerness_targets_list, \\\n            attr_targets_list = multi_apply(\n                self._get_target_single,\n                gt_bboxes_list,\n                gt_labels_list,\n                gt_bboxes_3d_list,\n                gt_labels_3d_list,\n                centers2d_list,\n                depths_list,\n                attr_labels_list,\n                points=concat_points,\n                regress_ranges=concat_regress_ranges,\n                num_points_per_lvl=num_points)\n\n        # split to per img, per level\n        labels_3d_list = [\n            labels_3d.split(num_points, 0) for labels_3d in labels_3d_list\n        ]\n        bbox_targets_3d_list = [\n            bbox_targets_3d.split(num_points, 0)\n            for bbox_targets_3d in bbox_targets_3d_list\n        ]\n        centerness_targets_list = [\n            centerness_targets.split(num_points, 0)\n            for centerness_targets in centerness_targets_list\n        ]\n        attr_targets_list = [\n            attr_targets.split(num_points, 0)\n            for attr_targets in attr_targets_list\n        ]\n\n        # concat per level image\n        concat_lvl_labels_3d = []\n        concat_lvl_bbox_targets_3d = []\n        concat_lvl_centerness_targets = []\n        concat_lvl_attr_targets = []\n        for i in range(num_levels):\n            concat_lvl_labels_3d.append(\n                torch.cat([labels[i] for labels in labels_3d_list]))\n            concat_lvl_centerness_targets.append(\n                torch.cat([\n                    centerness_targets[i]\n                    for centerness_targets in centerness_targets_list\n                ]))\n            bbox_targets_3d = torch.cat([\n                bbox_targets_3d[i] for bbox_targets_3d in bbox_targets_3d_list\n            ])\n            concat_lvl_attr_targets.append(\n                torch.cat(\n                    [attr_targets[i] for attr_targets in attr_targets_list]))\n            if self.norm_on_bbox:\n                bbox_targets_3d[:, :\n                                2] = bbox_targets_3d[:, :2] / self.strides[i]\n            concat_lvl_bbox_targets_3d.append(bbox_targets_3d)\n        return concat_lvl_labels_3d, concat_lvl_bbox_targets_3d, \\\n            concat_lvl_centerness_targets, concat_lvl_attr_targets\n\n    def _get_target_single(self, gt_bboxes, gt_labels, gt_bboxes_3d,\n                           gt_labels_3d, centers2d, depths, attr_labels,\n                           points, regress_ranges, num_points_per_lvl):\n        \"\"\"Compute regression and classification targets for a single image.\"\"\"\n        num_points = points.size(0)\n        num_gts = gt_labels.size(0)\n        if not isinstance(gt_bboxes_3d, torch.Tensor):\n            gt_bboxes_3d = gt_bboxes_3d.tensor.to(gt_bboxes.device)\n        if num_gts == 0:\n            return gt_labels.new_full((num_points,), self.background_label), \\\n                   gt_bboxes.new_zeros((num_points, 4)), \\\n                   gt_labels_3d.new_full(\n                       (num_points,), self.background_label), \\\n                   gt_bboxes_3d.new_zeros((num_points, self.bbox_code_size)), \\\n                   gt_bboxes_3d.new_zeros((num_points,)), \\\n                   attr_labels.new_full(\n                       (num_points,), self.attr_background_label)\n\n        # change orientation to local yaw\n        gt_bboxes_3d[..., 6] = -torch.atan2(\n            gt_bboxes_3d[..., 0], gt_bboxes_3d[..., 2]) + gt_bboxes_3d[..., 6]\n\n        areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0]) * (\n            gt_bboxes[:, 3] - gt_bboxes[:, 1])\n        areas = areas[None].repeat(num_points, 1)\n        regress_ranges = regress_ranges[:, None, :].expand(\n            num_points, num_gts, 2)\n        gt_bboxes = gt_bboxes[None].expand(num_points, num_gts, 4)\n        centers2d = centers2d[None].expand(num_points, num_gts, 2)\n        gt_bboxes_3d = gt_bboxes_3d[None].expand(num_points, num_gts,\n                                                 self.bbox_code_size)\n        depths = depths[None, :, None].expand(num_points, num_gts, 1)\n        xs, ys = points[:, 0], points[:, 1]\n        xs = xs[:, None].expand(num_points, num_gts)\n        ys = ys[:, None].expand(num_points, num_gts)\n\n        delta_xs = (xs - centers2d[..., 0])[..., None]\n        delta_ys = (ys - centers2d[..., 1])[..., None]\n        bbox_targets_3d = torch.cat(\n            (delta_xs, delta_ys, depths, gt_bboxes_3d[..., 3:]), dim=-1)\n\n        left = xs - gt_bboxes[..., 0]\n        right = gt_bboxes[..., 2] - xs\n        top = ys - gt_bboxes[..., 1]\n        bottom = gt_bboxes[..., 3] - ys\n        bbox_targets = torch.stack((left, top, right, bottom), -1)\n\n        assert self.center_sampling is True, 'Setting center_sampling to '\\\n            'False has not been implemented for FCOS3D.'\n        # condition1: inside a `center bbox`\n        radius = self.center_sample_radius\n        center_xs = centers2d[..., 0]\n        center_ys = centers2d[..., 1]\n        center_gts = torch.zeros_like(gt_bboxes)\n        stride = center_xs.new_zeros(center_xs.shape)\n\n        # project the points on current lvl back to the `original` sizes\n        lvl_begin = 0\n        for lvl_idx, num_points_lvl in enumerate(num_points_per_lvl):\n            lvl_end = lvl_begin + num_points_lvl\n            stride[lvl_begin:lvl_end] = self.strides[lvl_idx] * radius\n            lvl_begin = lvl_end\n\n        center_gts[..., 0] = center_xs - stride\n        center_gts[..., 1] = center_ys - stride\n        center_gts[..., 2] = center_xs + stride\n        center_gts[..., 3] = center_ys + stride\n\n        cb_dist_left = xs - center_gts[..., 0]\n        cb_dist_right = center_gts[..., 2] - xs\n        cb_dist_top = ys - center_gts[..., 1]\n        cb_dist_bottom = center_gts[..., 3] - ys\n        center_bbox = torch.stack(\n            (cb_dist_left, cb_dist_top, cb_dist_right, cb_dist_bottom), -1)\n        inside_gt_bbox_mask = center_bbox.min(-1)[0] > 0\n\n        # condition2: limit the regression range for each location\n        max_regress_distance = bbox_targets.max(-1)[0]\n        inside_regress_range = (\n            (max_regress_distance >= regress_ranges[..., 0])\n            & (max_regress_distance <= regress_ranges[..., 1]))\n\n        # center-based criterion to deal with ambiguity\n        dists = torch.sqrt(torch.sum(bbox_targets_3d[..., :2]**2, dim=-1))\n        dists[inside_gt_bbox_mask == 0] = INF\n        dists[inside_regress_range == 0] = INF\n        min_dist, min_dist_inds = dists.min(dim=1)\n\n        labels = gt_labels[min_dist_inds]\n        labels_3d = gt_labels_3d[min_dist_inds]\n        attr_labels = attr_labels[min_dist_inds]\n        labels[min_dist == INF] = self.background_label  # set as BG\n        labels_3d[min_dist == INF] = self.background_label  # set as BG\n        attr_labels[min_dist == INF] = self.attr_background_label\n\n        bbox_targets = bbox_targets[range(num_points), min_dist_inds]\n        bbox_targets_3d = bbox_targets_3d[range(num_points), min_dist_inds]\n        relative_dists = torch.sqrt(\n            torch.sum(bbox_targets_3d[..., :2]**2,\n                      dim=-1)) / (1.414 * stride[:, 0])\n        # [N, 1] / [N, 1]\n        centerness_targets = torch.exp(-self.centerness_alpha * relative_dists)\n\n        return labels, bbox_targets, labels_3d, bbox_targets_3d, \\\n            centerness_targets, attr_labels\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/free_anchor3d_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.runner import force_fp32\nfrom torch.nn import functional as F\n\nfrom mmdet3d.core.bbox import bbox_overlaps_nearest_3d\nfrom ..builder import HEADS\nfrom .anchor3d_head import Anchor3DHead\nfrom .train_mixins import get_direction_target\n\n\n@HEADS.register_module()\nclass FreeAnchor3DHead(Anchor3DHead):\n    r\"\"\"`FreeAnchor <https://arxiv.org/abs/1909.02466>`_ head for 3D detection.\n\n    Note:\n        This implementation is directly modified from the `mmdet implementation\n        <https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/dense_heads/free_anchor_retina_head.py>`_.\n        We find it also works on 3D detection with minor modification, i.e.,\n        different hyper-parameters and a additional direction classifier.\n\n    Args:\n        pre_anchor_topk (int): Number of boxes that be token in each bag.\n        bbox_thr (float): The threshold of the saturated linear function. It is\n            usually the same with the IoU threshold used in NMS.\n        gamma (float): Gamma parameter in focal loss.\n        alpha (float): Alpha parameter in focal loss.\n        kwargs (dict): Other arguments are the same as those in :class:`Anchor3DHead`.\n    \"\"\"  # noqa: E501\n\n    def __init__(self,\n                 pre_anchor_topk=50,\n                 bbox_thr=0.6,\n                 gamma=2.0,\n                 alpha=0.5,\n                 init_cfg=None,\n                 **kwargs):\n        super().__init__(init_cfg=init_cfg, **kwargs)\n        self.pre_anchor_topk = pre_anchor_topk\n        self.bbox_thr = bbox_thr\n        self.gamma = gamma\n        self.alpha = alpha\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             dir_cls_preds,\n             gt_bboxes,\n             gt_labels,\n             input_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Calculate loss of FreeAnchor head.\n\n        Args:\n            cls_scores (list[torch.Tensor]): Classification scores of\n                different samples.\n            bbox_preds (list[torch.Tensor]): Box predictions of\n                different samples\n            dir_cls_preds (list[torch.Tensor]): Direction predictions of\n                different samples\n            gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Ground truth boxes.\n            gt_labels (list[torch.Tensor]): Ground truth labels.\n            input_metas (list[dict]): List of input meta information.\n            gt_bboxes_ignore (list[:obj:`BaseInstance3DBoxes`], optional):\n                Ground truth boxes that should be ignored. Defaults to None.\n\n        Returns:\n            dict[str, torch.Tensor]: Loss items.\n\n                - positive_bag_loss (torch.Tensor): Loss of positive samples.\n                - negative_bag_loss (torch.Tensor): Loss of negative samples.\n        \"\"\"\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(featmap_sizes) == self.anchor_generator.num_levels\n\n        anchor_list = self.get_anchors(featmap_sizes, input_metas)\n        anchors = [torch.cat(anchor) for anchor in anchor_list]\n\n        # concatenate each level\n        cls_scores = [\n            cls_score.permute(0, 2, 3, 1).reshape(\n                cls_score.size(0), -1, self.num_classes)\n            for cls_score in cls_scores\n        ]\n        bbox_preds = [\n            bbox_pred.permute(0, 2, 3, 1).reshape(\n                bbox_pred.size(0), -1, self.box_code_size)\n            for bbox_pred in bbox_preds\n        ]\n        dir_cls_preds = [\n            dir_cls_pred.permute(0, 2, 3,\n                                 1).reshape(dir_cls_pred.size(0), -1, 2)\n            for dir_cls_pred in dir_cls_preds\n        ]\n\n        cls_scores = torch.cat(cls_scores, dim=1)\n        bbox_preds = torch.cat(bbox_preds, dim=1)\n        dir_cls_preds = torch.cat(dir_cls_preds, dim=1)\n\n        cls_prob = torch.sigmoid(cls_scores)\n        box_prob = []\n        num_pos = 0\n        positive_losses = []\n        for _, (anchors_, gt_labels_, gt_bboxes_, cls_prob_, bbox_preds_,\n                dir_cls_preds_) in enumerate(\n                    zip(anchors, gt_labels, gt_bboxes, cls_prob, bbox_preds,\n                        dir_cls_preds)):\n\n            gt_bboxes_ = gt_bboxes_.tensor.to(anchors_.device)\n\n            with torch.no_grad():\n                # box_localization: a_{j}^{loc}, shape: [j, 4]\n                pred_boxes = self.bbox_coder.decode(anchors_, bbox_preds_)\n\n                # object_box_iou: IoU_{ij}^{loc}, shape: [i, j]\n                object_box_iou = bbox_overlaps_nearest_3d(\n                    gt_bboxes_, pred_boxes)\n\n                # object_box_prob: P{a_{j} -> b_{i}}, shape: [i, j]\n                t1 = self.bbox_thr\n                t2 = object_box_iou.max(\n                    dim=1, keepdim=True).values.clamp(min=t1 + 1e-6)\n                object_box_prob = ((object_box_iou - t1) / (t2 - t1)).clamp(\n                    min=0, max=1)\n\n                # object_cls_box_prob: P{a_{j} -> b_{i}}, shape: [i, c, j]\n                num_obj = gt_labels_.size(0)\n                indices = torch.stack(\n                    [torch.arange(num_obj).type_as(gt_labels_), gt_labels_],\n                    dim=0)\n\n                object_cls_box_prob = torch.sparse_coo_tensor(\n                    indices, object_box_prob)\n\n                # image_box_iou: P{a_{j} \\in A_{+}}, shape: [c, j]\n                \"\"\"\n                from \"start\" to \"end\" implement:\n                image_box_iou = torch.sparse.max(object_cls_box_prob,\n                                                 dim=0).t()\n\n                \"\"\"\n                # start\n                box_cls_prob = torch.sparse.sum(\n                    object_cls_box_prob, dim=0).to_dense()\n\n                indices = torch.nonzero(box_cls_prob, as_tuple=False).t_()\n                if indices.numel() == 0:\n                    image_box_prob = torch.zeros(\n                        anchors_.size(0),\n                        self.num_classes).type_as(object_box_prob)\n                else:\n                    nonzero_box_prob = torch.where(\n                        (gt_labels_.unsqueeze(dim=-1) == indices[0]),\n                        object_box_prob[:, indices[1]],\n                        torch.tensor(\n                            [0]).type_as(object_box_prob)).max(dim=0).values\n\n                    # upmap to shape [j, c]\n                    image_box_prob = torch.sparse_coo_tensor(\n                        indices.flip([0]),\n                        nonzero_box_prob,\n                        size=(anchors_.size(0), self.num_classes)).to_dense()\n                # end\n\n                box_prob.append(image_box_prob)\n\n            # construct bags for objects\n            match_quality_matrix = bbox_overlaps_nearest_3d(\n                gt_bboxes_, anchors_)\n            _, matched = torch.topk(\n                match_quality_matrix,\n                self.pre_anchor_topk,\n                dim=1,\n                sorted=False)\n            del match_quality_matrix\n\n            # matched_cls_prob: P_{ij}^{cls}\n            matched_cls_prob = torch.gather(\n                cls_prob_[matched], 2,\n                gt_labels_.view(-1, 1, 1).repeat(1, self.pre_anchor_topk,\n                                                 1)).squeeze(2)\n\n            # matched_box_prob: P_{ij}^{loc}\n            matched_anchors = anchors_[matched]\n            matched_object_targets = self.bbox_coder.encode(\n                matched_anchors,\n                gt_bboxes_.unsqueeze(dim=1).expand_as(matched_anchors))\n\n            # direction classification loss\n            loss_dir = None\n            if self.use_direction_classifier:\n                # also calculate direction prob: P_{ij}^{dir}\n                matched_dir_targets = get_direction_target(\n                    matched_anchors,\n                    matched_object_targets,\n                    self.dir_offset,\n                    self.dir_limit_offset,\n                    one_hot=False)\n                loss_dir = self.loss_dir(\n                    dir_cls_preds_[matched].transpose(-2, -1),\n                    matched_dir_targets,\n                    reduction_override='none')\n\n            # generate bbox weights\n            if self.diff_rad_by_sin:\n                bbox_preds_[matched], matched_object_targets = \\\n                    self.add_sin_difference(\n                        bbox_preds_[matched], matched_object_targets)\n            bbox_weights = matched_anchors.new_ones(matched_anchors.size())\n            # Use pop is not right, check performance\n            code_weight = self.train_cfg.get('code_weight', None)\n            if code_weight:\n                bbox_weights = bbox_weights * bbox_weights.new_tensor(\n                    code_weight)\n            loss_bbox = self.loss_bbox(\n                bbox_preds_[matched],\n                matched_object_targets,\n                bbox_weights,\n                reduction_override='none').sum(-1)\n\n            if loss_dir is not None:\n                loss_bbox += loss_dir\n            matched_box_prob = torch.exp(-loss_bbox)\n\n            # positive_losses: {-log( Mean-max(P_{ij}^{cls} * P_{ij}^{loc}) )}\n            num_pos += len(gt_bboxes_)\n            positive_losses.append(\n                self.positive_bag_loss(matched_cls_prob, matched_box_prob))\n\n        positive_loss = torch.cat(positive_losses).sum() / max(1, num_pos)\n\n        # box_prob: P{a_{j} \\in A_{+}}\n        box_prob = torch.stack(box_prob, dim=0)\n\n        # negative_loss:\n        # \\sum_{j}{ FL((1 - P{a_{j} \\in A_{+}}) * (1 - P_{j}^{bg})) } / n||B||\n        negative_loss = self.negative_bag_loss(cls_prob, box_prob).sum() / max(\n            1, num_pos * self.pre_anchor_topk)\n\n        losses = {\n            'positive_bag_loss': positive_loss,\n            'negative_bag_loss': negative_loss\n        }\n        return losses\n\n    def positive_bag_loss(self, matched_cls_prob, matched_box_prob):\n        \"\"\"Generate positive bag loss.\n\n        Args:\n            matched_cls_prob (torch.Tensor): Classification probability\n                of matched positive samples.\n            matched_box_prob (torch.Tensor): Bounding box probability\n                of matched positive samples.\n\n        Returns:\n            torch.Tensor: Loss of positive samples.\n        \"\"\"\n        # bag_prob = Mean-max(matched_prob)\n        matched_prob = matched_cls_prob * matched_box_prob\n        weight = 1 / torch.clamp(1 - matched_prob, 1e-12, None)\n        weight /= weight.sum(dim=1).unsqueeze(dim=-1)\n        bag_prob = (weight * matched_prob).sum(dim=1)\n        # positive_bag_loss = -self.alpha * log(bag_prob)\n        bag_prob = bag_prob.clamp(0, 1)  # to avoid bug of BCE, check\n        return self.alpha * F.binary_cross_entropy(\n            bag_prob, torch.ones_like(bag_prob), reduction='none')\n\n    def negative_bag_loss(self, cls_prob, box_prob):\n        \"\"\"Generate negative bag loss.\n\n        Args:\n            cls_prob (torch.Tensor): Classification probability\n                of negative samples.\n            box_prob (torch.Tensor): Bounding box probability\n                of negative samples.\n\n        Returns:\n            torch.Tensor: Loss of negative samples.\n        \"\"\"\n        prob = cls_prob * (1 - box_prob)\n        prob = prob.clamp(0, 1)  # to avoid bug of BCE, check\n        negative_bag_loss = prob**self.gamma * F.binary_cross_entropy(\n            prob, torch.zeros_like(prob), reduction='none')\n        return (1 - self.alpha) * negative_bag_loss\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/groupfree3d_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\n\nimport numpy as np\nimport torch\nfrom mmcv import ConfigDict\nfrom mmcv.cnn import ConvModule, xavier_init\nfrom mmcv.cnn.bricks.transformer import (build_positional_encoding,\n                                         build_transformer_layer)\nfrom mmcv.ops import PointsSampler as Points_Sampler\nfrom mmcv.ops import gather_points\nfrom mmcv.runner import BaseModule, force_fp32\nfrom torch import nn as nn\nfrom torch.nn import functional as F\n\nfrom mmdet3d.core.post_processing import aligned_3d_nms\nfrom mmdet.core import build_bbox_coder, multi_apply\nfrom ..builder import HEADS, build_loss\nfrom .base_conv_bbox_head import BaseConvBboxHead\n\nEPS = 1e-6\n\n\nclass PointsObjClsModule(BaseModule):\n    \"\"\"object candidate point prediction from seed point features.\n\n    Args:\n        in_channel (int): number of channels of seed point features.\n        num_convs (int, optional): number of conv layers.\n            Default: 3.\n        conv_cfg (dict, optional): Config of convolution.\n            Default: dict(type='Conv1d').\n        norm_cfg (dict, optional): Config of normalization.\n            Default: dict(type='BN1d').\n        act_cfg (dict, optional): Config of activation.\n            Default: dict(type='ReLU').\n    \"\"\"\n\n    def __init__(self,\n                 in_channel,\n                 num_convs=3,\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=dict(type='BN1d'),\n                 act_cfg=dict(type='ReLU'),\n                 init_cfg=None):\n        super().__init__(init_cfg=init_cfg)\n        conv_channels = [in_channel for _ in range(num_convs - 1)]\n        conv_channels.append(1)\n\n        self.mlp = nn.Sequential()\n        prev_channels = in_channel\n        for i in range(num_convs):\n            self.mlp.add_module(\n                f'layer{i}',\n                ConvModule(\n                    prev_channels,\n                    conv_channels[i],\n                    1,\n                    padding=0,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg if i < num_convs - 1 else None,\n                    act_cfg=act_cfg if i < num_convs - 1 else None,\n                    bias=True,\n                    inplace=True))\n            prev_channels = conv_channels[i]\n\n    def forward(self, seed_features):\n        \"\"\"Forward pass.\n\n        Args:\n            seed_features (torch.Tensor): seed features, dims:\n                (batch_size, feature_dim, num_seed)\n\n        Returns:\n            torch.Tensor: objectness logits, dim:\n                (batch_size, 1, num_seed)\n        \"\"\"\n        return self.mlp(seed_features)\n\n\nclass GeneralSamplingModule(nn.Module):\n    \"\"\"Sampling Points.\n\n    Sampling points with given index.\n    \"\"\"\n\n    def forward(self, xyz, features, sample_inds):\n        \"\"\"Forward pass.\n\n        Args:\n            xyz： (B, N, 3) the coordinates of the features.\n            features (Tensor): (B, C, N) features to sample.\n            sample_inds (Tensor): (B, M) the given index,\n                where M is the number of points.\n\n        Returns:\n            Tensor: (B, M, 3) coordinates of sampled features\n            Tensor: (B, C, M) the sampled features.\n            Tensor: (B, M) the given index.\n        \"\"\"\n        xyz_t = xyz.transpose(1, 2).contiguous()\n        new_xyz = gather_points(xyz_t, sample_inds).transpose(1,\n                                                              2).contiguous()\n        new_features = gather_points(features, sample_inds).contiguous()\n\n        return new_xyz, new_features, sample_inds\n\n\n@HEADS.register_module()\nclass GroupFree3DHead(BaseModule):\n    r\"\"\"Bbox head of `Group-Free 3D <https://arxiv.org/abs/2104.00678>`_.\n\n    Args:\n        num_classes (int): The number of class.\n        in_channels (int): The dims of input features from backbone.\n        bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for encoding and\n            decoding boxes.\n        num_decoder_layers (int): The number of transformer decoder layers.\n        transformerlayers (dict): Config for transformer decoder.\n        train_cfg (dict): Config for training.\n        test_cfg (dict): Config for testing.\n        num_proposal (int): The number of initial sampling candidates.\n        pred_layer_cfg (dict): Config of classfication and regression\n            prediction layers.\n        size_cls_agnostic (bool): Whether the predicted size is class-agnostic.\n        gt_per_seed (int): the number of candidate instance each point belongs\n            to.\n        sampling_objectness_loss (dict): Config of initial sampling\n            objectness loss.\n        objectness_loss (dict): Config of objectness loss.\n        center_loss (dict): Config of center loss.\n        dir_class_loss (dict): Config of direction classification loss.\n        dir_res_loss (dict): Config of direction residual regression loss.\n        size_class_loss (dict): Config of size classification loss.\n        size_res_loss (dict): Config of size residual regression loss.\n        size_reg_loss (dict): Config of class-agnostic size regression loss.\n        semantic_loss (dict): Config of point-wise semantic segmentation loss.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 bbox_coder,\n                 num_decoder_layers,\n                 transformerlayers,\n                 decoder_self_posembeds=dict(\n                     type='ConvBNPositionalEncoding',\n                     input_channel=6,\n                     num_pos_feats=288),\n                 decoder_cross_posembeds=dict(\n                     type='ConvBNPositionalEncoding',\n                     input_channel=3,\n                     num_pos_feats=288),\n                 train_cfg=None,\n                 test_cfg=None,\n                 num_proposal=128,\n                 pred_layer_cfg=None,\n                 size_cls_agnostic=True,\n                 gt_per_seed=3,\n                 sampling_objectness_loss=None,\n                 objectness_loss=None,\n                 center_loss=None,\n                 dir_class_loss=None,\n                 dir_res_loss=None,\n                 size_class_loss=None,\n                 size_res_loss=None,\n                 size_reg_loss=None,\n                 semantic_loss=None,\n                 init_cfg=None):\n        super(GroupFree3DHead, self).__init__(init_cfg=init_cfg)\n        self.num_classes = num_classes\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.num_proposal = num_proposal\n        self.in_channels = in_channels\n        self.num_decoder_layers = num_decoder_layers\n        self.size_cls_agnostic = size_cls_agnostic\n        self.gt_per_seed = gt_per_seed\n\n        # Transformer decoder layers\n        if isinstance(transformerlayers, ConfigDict):\n            transformerlayers = [\n                copy.deepcopy(transformerlayers)\n                for _ in range(num_decoder_layers)\n            ]\n        else:\n            assert isinstance(transformerlayers, list) and \\\n                   len(transformerlayers) == num_decoder_layers\n        self.decoder_layers = nn.ModuleList()\n        for i in range(self.num_decoder_layers):\n            self.decoder_layers.append(\n                build_transformer_layer(transformerlayers[i]))\n        self.embed_dims = self.decoder_layers[0].embed_dims\n        assert self.embed_dims == decoder_self_posembeds['num_pos_feats']\n        assert self.embed_dims == decoder_cross_posembeds['num_pos_feats']\n\n        # bbox_coder\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n        self.num_sizes = self.bbox_coder.num_sizes\n        self.num_dir_bins = self.bbox_coder.num_dir_bins\n\n        # Initial object candidate sampling\n        self.gsample_module = GeneralSamplingModule()\n        self.fps_module = Points_Sampler([self.num_proposal])\n        self.points_obj_cls = PointsObjClsModule(self.in_channels)\n\n        self.fp16_enabled = False\n\n        # initial candidate prediction\n        self.conv_pred = BaseConvBboxHead(\n            **pred_layer_cfg,\n            num_cls_out_channels=self._get_cls_out_channels(),\n            num_reg_out_channels=self._get_reg_out_channels())\n\n        # query proj and key proj\n        self.decoder_query_proj = nn.Conv1d(\n            self.embed_dims, self.embed_dims, kernel_size=1)\n        self.decoder_key_proj = nn.Conv1d(\n            self.embed_dims, self.embed_dims, kernel_size=1)\n\n        # query position embed\n        self.decoder_self_posembeds = nn.ModuleList()\n        for _ in range(self.num_decoder_layers):\n            self.decoder_self_posembeds.append(\n                build_positional_encoding(decoder_self_posembeds))\n        # key position embed\n        self.decoder_cross_posembeds = nn.ModuleList()\n        for _ in range(self.num_decoder_layers):\n            self.decoder_cross_posembeds.append(\n                build_positional_encoding(decoder_cross_posembeds))\n\n        # Prediction Head\n        self.prediction_heads = nn.ModuleList()\n        for i in range(self.num_decoder_layers):\n            self.prediction_heads.append(\n                BaseConvBboxHead(\n                    **pred_layer_cfg,\n                    num_cls_out_channels=self._get_cls_out_channels(),\n                    num_reg_out_channels=self._get_reg_out_channels()))\n\n        self.sampling_objectness_loss = build_loss(sampling_objectness_loss)\n        self.objectness_loss = build_loss(objectness_loss)\n        self.center_loss = build_loss(center_loss)\n        self.dir_res_loss = build_loss(dir_res_loss)\n        self.dir_class_loss = build_loss(dir_class_loss)\n        self.semantic_loss = build_loss(semantic_loss)\n        if self.size_cls_agnostic:\n            self.size_reg_loss = build_loss(size_reg_loss)\n        else:\n            self.size_res_loss = build_loss(size_res_loss)\n            self.size_class_loss = build_loss(size_class_loss)\n\n    def init_weights(self):\n        \"\"\"Initialize weights of transformer decoder in GroupFree3DHead.\"\"\"\n        # initialize transformer\n        for m in self.decoder_layers.parameters():\n            if m.dim() > 1:\n                xavier_init(m, distribution='uniform')\n        for m in self.decoder_self_posembeds.parameters():\n            if m.dim() > 1:\n                xavier_init(m, distribution='uniform')\n        for m in self.decoder_cross_posembeds.parameters():\n            if m.dim() > 1:\n                xavier_init(m, distribution='uniform')\n\n    def _get_cls_out_channels(self):\n        \"\"\"Return the channel number of classification outputs.\"\"\"\n        # Class numbers (k) + objectness (1)\n        return self.num_classes + 1\n\n    def _get_reg_out_channels(self):\n        \"\"\"Return the channel number of regression outputs.\"\"\"\n        # center residual (3),\n        # heading class+residual (num_dir_bins*2),\n        # size class+residual(num_sizes*4 or 3)\n        if self.size_cls_agnostic:\n            return 6 + self.num_dir_bins * 2\n        else:\n            return 3 + self.num_dir_bins * 2 + self.num_sizes * 4\n\n    def _extract_input(self, feat_dict):\n        \"\"\"Extract inputs from features dictionary.\n\n        Args:\n            feat_dict (dict): Feature dict from backbone.\n\n        Returns:\n            torch.Tensor: Coordinates of input points.\n            torch.Tensor: Features of input points.\n            torch.Tensor: Indices of input points.\n        \"\"\"\n\n        seed_points = feat_dict['fp_xyz'][-1]\n        seed_features = feat_dict['fp_features'][-1]\n        seed_indices = feat_dict['fp_indices'][-1]\n\n        return seed_points, seed_features, seed_indices\n\n    def forward(self, feat_dict, sample_mod):\n        \"\"\"Forward pass.\n\n        Note:\n            The forward of GroupFree3DHead is divided into 2 steps:\n\n                1. Initial object candidates sampling.\n                2. Iterative object box prediction by transformer decoder.\n\n        Args:\n            feat_dict (dict): Feature dict from backbone.\n            sample_mod (str): sample mode for initial candidates sampling.\n\n        Returns:\n            results (dict): Predictions of GroupFree3D head.\n        \"\"\"\n        assert sample_mod in ['fps', 'kps']\n\n        seed_xyz, seed_features, seed_indices = self._extract_input(feat_dict)\n\n        results = dict(\n            seed_points=seed_xyz,\n            seed_features=seed_features,\n            seed_indices=seed_indices)\n\n        # 1. Initial object candidates sampling.\n        if sample_mod == 'fps':\n            sample_inds = self.fps_module(seed_xyz, seed_features)\n        elif sample_mod == 'kps':\n            points_obj_cls_logits = self.points_obj_cls(\n                seed_features)  # (batch_size, 1, num_seed)\n            points_obj_cls_scores = points_obj_cls_logits.sigmoid().squeeze(1)\n            sample_inds = torch.topk(points_obj_cls_scores,\n                                     self.num_proposal)[1].int()\n            results['seeds_obj_cls_logits'] = points_obj_cls_logits\n        else:\n            raise NotImplementedError(\n                f'Sample mode {sample_mod} is not supported!')\n\n        candidate_xyz, candidate_features, sample_inds = self.gsample_module(\n            seed_xyz, seed_features, sample_inds)\n\n        results['query_points_xyz'] = candidate_xyz  # (B, M, 3)\n        results['query_points_feature'] = candidate_features  # (B, C, M)\n        results['query_points_sample_inds'] = sample_inds.long()  # (B, M)\n\n        prefix = 'proposal.'\n        cls_predictions, reg_predictions = self.conv_pred(candidate_features)\n        decode_res = self.bbox_coder.split_pred(cls_predictions,\n                                                reg_predictions, candidate_xyz,\n                                                prefix)\n\n        results.update(decode_res)\n        bbox3d = self.bbox_coder.decode(results, prefix)\n\n        # 2. Iterative object box prediction by transformer decoder.\n        base_bbox3d = bbox3d[:, :, :6].detach().clone()\n\n        query = self.decoder_query_proj(candidate_features).permute(2, 0, 1)\n        key = self.decoder_key_proj(seed_features).permute(2, 0, 1)\n        value = key\n\n        # transformer decoder\n        results['num_decoder_layers'] = 0\n        for i in range(self.num_decoder_layers):\n            prefix = f's{i}.'\n\n            query_pos = self.decoder_self_posembeds[i](base_bbox3d).permute(\n                2, 0, 1)\n            key_pos = self.decoder_cross_posembeds[i](seed_xyz).permute(\n                2, 0, 1)\n\n            query = self.decoder_layers[i](\n                query, key, value, query_pos=query_pos,\n                key_pos=key_pos).permute(1, 2, 0)\n\n            results[f'{prefix}query'] = query\n\n            cls_predictions, reg_predictions = self.prediction_heads[i](query)\n            decode_res = self.bbox_coder.split_pred(cls_predictions,\n                                                    reg_predictions,\n                                                    candidate_xyz, prefix)\n            # TODO: should save bbox3d instead of decode_res?\n            results.update(decode_res)\n\n            bbox3d = self.bbox_coder.decode(results, prefix)\n            results[f'{prefix}bbox3d'] = bbox3d\n            base_bbox3d = bbox3d[:, :, :6].detach().clone()\n            query = query.permute(2, 0, 1)\n\n            results['num_decoder_layers'] += 1\n\n        return results\n\n    @force_fp32(apply_to=('bbox_preds', ))\n    def loss(self,\n             bbox_preds,\n             points,\n             gt_bboxes_3d,\n             gt_labels_3d,\n             pts_semantic_mask=None,\n             pts_instance_mask=None,\n             img_metas=None,\n             gt_bboxes_ignore=None,\n             ret_target=False):\n        \"\"\"Compute loss.\n\n        Args:\n            bbox_preds (dict): Predictions from forward of vote head.\n            points (list[torch.Tensor]): Input points.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                bboxes of each sample.\n            gt_labels_3d (list[torch.Tensor]): Labels of each sample.\n            pts_semantic_mask (list[torch.Tensor]): Point-wise\n                semantic mask.\n            pts_instance_mask (list[torch.Tensor]): Point-wise\n                instance mask.\n            img_metas (list[dict]): Contain pcd and img's meta info.\n            gt_bboxes_ignore (list[torch.Tensor]): Specify\n                which bounding.\n            ret_target (Bool): Return targets or not.\n\n        Returns:\n            dict: Losses of GroupFree3D.\n        \"\"\"\n        targets = self.get_targets(points, gt_bboxes_3d, gt_labels_3d,\n                                   pts_semantic_mask, pts_instance_mask,\n                                   bbox_preds)\n        (sampling_targets, sampling_weights, assigned_size_targets,\n         size_class_targets, size_res_targets, dir_class_targets,\n         dir_res_targets, center_targets, assigned_center_targets,\n         mask_targets, valid_gt_masks, objectness_targets, objectness_weights,\n         box_loss_weights, valid_gt_weights) = targets\n\n        batch_size, proposal_num = size_class_targets.shape[:2]\n\n        losses = dict()\n\n        # calculate objectness classification loss\n        sampling_obj_score = bbox_preds['seeds_obj_cls_logits'].reshape(-1, 1)\n        sampling_objectness_loss = self.sampling_objectness_loss(\n            sampling_obj_score,\n            1 - sampling_targets.reshape(-1),\n            sampling_weights.reshape(-1),\n            avg_factor=batch_size)\n        losses['sampling_objectness_loss'] = sampling_objectness_loss\n\n        prefixes = ['proposal.'] + [\n            f's{i}.' for i in range(bbox_preds['num_decoder_layers'])\n        ]\n        num_stages = len(prefixes)\n        for prefix in prefixes:\n\n            # calculate objectness loss\n            obj_score = bbox_preds[f'{prefix}obj_scores'].transpose(2, 1)\n            objectness_loss = self.objectness_loss(\n                obj_score.reshape(-1, 1),\n                1 - objectness_targets.reshape(-1),\n                objectness_weights.reshape(-1),\n                avg_factor=batch_size)\n            losses[f'{prefix}objectness_loss'] = objectness_loss / num_stages\n\n            # calculate center loss\n            box_loss_weights_expand = box_loss_weights.unsqueeze(-1).expand(\n                -1, -1, 3)\n            center_loss = self.center_loss(\n                bbox_preds[f'{prefix}center'],\n                assigned_center_targets,\n                weight=box_loss_weights_expand)\n            losses[f'{prefix}center_loss'] = center_loss / num_stages\n\n            # calculate direction class loss\n            dir_class_loss = self.dir_class_loss(\n                bbox_preds[f'{prefix}dir_class'].transpose(2, 1),\n                dir_class_targets,\n                weight=box_loss_weights)\n            losses[f'{prefix}dir_class_loss'] = dir_class_loss / num_stages\n\n            # calculate direction residual loss\n            heading_label_one_hot = size_class_targets.new_zeros(\n                (batch_size, proposal_num, self.num_dir_bins))\n            heading_label_one_hot.scatter_(2, dir_class_targets.unsqueeze(-1),\n                                           1)\n            dir_res_norm = torch.sum(\n                bbox_preds[f'{prefix}dir_res_norm'] * heading_label_one_hot,\n                -1)\n            dir_res_loss = self.dir_res_loss(\n                dir_res_norm, dir_res_targets, weight=box_loss_weights)\n            losses[f'{prefix}dir_res_loss'] = dir_res_loss / num_stages\n\n            if self.size_cls_agnostic:\n                # calculate class-agnostic size loss\n                size_reg_loss = self.size_reg_loss(\n                    bbox_preds[f'{prefix}size'],\n                    assigned_size_targets,\n                    weight=box_loss_weights_expand)\n                losses[f'{prefix}size_reg_loss'] = size_reg_loss / num_stages\n\n            else:\n                # calculate size class loss\n                size_class_loss = self.size_class_loss(\n                    bbox_preds[f'{prefix}size_class'].transpose(2, 1),\n                    size_class_targets,\n                    weight=box_loss_weights)\n                losses[\n                    f'{prefix}size_class_loss'] = size_class_loss / num_stages\n\n                # calculate size residual loss\n                one_hot_size_targets = size_class_targets.new_zeros(\n                    (batch_size, proposal_num, self.num_sizes))\n                one_hot_size_targets.scatter_(2,\n                                              size_class_targets.unsqueeze(-1),\n                                              1)\n                one_hot_size_targets_expand = one_hot_size_targets.unsqueeze(\n                    -1).expand(-1, -1, -1, 3).contiguous()\n                size_residual_norm = torch.sum(\n                    bbox_preds[f'{prefix}size_res_norm'] *\n                    one_hot_size_targets_expand, 2)\n                box_loss_weights_expand = box_loss_weights.unsqueeze(\n                    -1).expand(-1, -1, 3)\n                size_res_loss = self.size_res_loss(\n                    size_residual_norm,\n                    size_res_targets,\n                    weight=box_loss_weights_expand)\n                losses[f'{prefix}size_res_loss'] = size_res_loss / num_stages\n\n            # calculate semantic loss\n            semantic_loss = self.semantic_loss(\n                bbox_preds[f'{prefix}sem_scores'].transpose(2, 1),\n                mask_targets,\n                weight=box_loss_weights)\n            losses[f'{prefix}semantic_loss'] = semantic_loss / num_stages\n\n        if ret_target:\n            losses['targets'] = targets\n\n        return losses\n\n    def get_targets(self,\n                    points,\n                    gt_bboxes_3d,\n                    gt_labels_3d,\n                    pts_semantic_mask=None,\n                    pts_instance_mask=None,\n                    bbox_preds=None,\n                    max_gt_num=64):\n        \"\"\"Generate targets of GroupFree3D head.\n\n        Args:\n            points (list[torch.Tensor]): Points of each batch.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                bboxes of each batch.\n            gt_labels_3d (list[torch.Tensor]): Labels of each batch.\n            pts_semantic_mask (list[torch.Tensor]): Point-wise semantic\n                label of each batch.\n            pts_instance_mask (list[torch.Tensor]): Point-wise instance\n                label of each batch.\n            bbox_preds (torch.Tensor): Bounding box predictions of vote head.\n            max_gt_num (int): Max number of GTs for single batch.\n\n        Returns:\n            tuple[torch.Tensor]: Targets of GroupFree3D head.\n        \"\"\"\n        # find empty example\n        valid_gt_masks = list()\n        gt_num = list()\n        for index in range(len(gt_labels_3d)):\n            if len(gt_labels_3d[index]) == 0:\n                fake_box = gt_bboxes_3d[index].tensor.new_zeros(\n                    1, gt_bboxes_3d[index].tensor.shape[-1])\n                gt_bboxes_3d[index] = gt_bboxes_3d[index].new_box(fake_box)\n                gt_labels_3d[index] = gt_labels_3d[index].new_zeros(1)\n                valid_gt_masks.append(gt_labels_3d[index].new_zeros(1))\n                gt_num.append(1)\n            else:\n                valid_gt_masks.append(gt_labels_3d[index].new_ones(\n                    gt_labels_3d[index].shape))\n                gt_num.append(gt_labels_3d[index].shape[0])\n        # max_gt_num = max(gt_num)\n\n        max_gt_nums = [max_gt_num for _ in range(len(gt_labels_3d))]\n\n        if pts_semantic_mask is None:\n            pts_semantic_mask = [None for i in range(len(gt_labels_3d))]\n            pts_instance_mask = [None for i in range(len(gt_labels_3d))]\n\n        seed_points = [\n            bbox_preds['seed_points'][i] for i in range(len(gt_labels_3d))\n        ]\n\n        seed_indices = [\n            bbox_preds['seed_indices'][i] for i in range(len(gt_labels_3d))\n        ]\n\n        candidate_indices = [\n            bbox_preds['query_points_sample_inds'][i]\n            for i in range(len(gt_labels_3d))\n        ]\n\n        (sampling_targets, assigned_size_targets, size_class_targets,\n         size_res_targets, dir_class_targets, dir_res_targets, center_targets,\n         assigned_center_targets, mask_targets, objectness_targets,\n         objectness_masks) = multi_apply(self.get_targets_single, points,\n                                         gt_bboxes_3d, gt_labels_3d,\n                                         pts_semantic_mask, pts_instance_mask,\n                                         max_gt_nums, seed_points,\n                                         seed_indices, candidate_indices)\n\n        # pad targets as original code of GroupFree3D.\n        for index in range(len(gt_labels_3d)):\n            pad_num = max_gt_num - gt_labels_3d[index].shape[0]\n            valid_gt_masks[index] = F.pad(valid_gt_masks[index], (0, pad_num))\n\n        sampling_targets = torch.stack(sampling_targets)\n        sampling_weights = (sampling_targets >= 0).float()\n        sampling_normalizer = sampling_weights.sum(dim=1, keepdim=True).float()\n        sampling_weights /= sampling_normalizer.clamp(min=1.0)\n\n        assigned_size_targets = torch.stack(assigned_size_targets)\n        center_targets = torch.stack(center_targets)\n        valid_gt_masks = torch.stack(valid_gt_masks)\n\n        assigned_center_targets = torch.stack(assigned_center_targets)\n        objectness_targets = torch.stack(objectness_targets)\n\n        objectness_weights = torch.stack(objectness_masks)\n        cls_normalizer = objectness_weights.sum(dim=1, keepdim=True).float()\n        objectness_weights /= cls_normalizer.clamp(min=1.0)\n\n        box_loss_weights = objectness_targets.float() / (\n            objectness_targets.sum().float() + EPS)\n\n        valid_gt_weights = valid_gt_masks.float() / (\n            valid_gt_masks.sum().float() + EPS)\n\n        dir_class_targets = torch.stack(dir_class_targets)\n        dir_res_targets = torch.stack(dir_res_targets)\n        size_class_targets = torch.stack(size_class_targets)\n        size_res_targets = torch.stack(size_res_targets)\n        mask_targets = torch.stack(mask_targets)\n\n        return (sampling_targets, sampling_weights, assigned_size_targets,\n                size_class_targets, size_res_targets, dir_class_targets,\n                dir_res_targets, center_targets, assigned_center_targets,\n                mask_targets, valid_gt_masks, objectness_targets,\n                objectness_weights, box_loss_weights, valid_gt_weights)\n\n    def get_targets_single(self,\n                           points,\n                           gt_bboxes_3d,\n                           gt_labels_3d,\n                           pts_semantic_mask=None,\n                           pts_instance_mask=None,\n                           max_gt_nums=None,\n                           seed_points=None,\n                           seed_indices=None,\n                           candidate_indices=None,\n                           seed_points_obj_topk=4):\n        \"\"\"Generate targets of GroupFree3D head for single batch.\n\n        Args:\n            points (torch.Tensor): Points of each batch.\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth\n                boxes of each batch.\n            gt_labels_3d (torch.Tensor): Labels of each batch.\n            pts_semantic_mask (torch.Tensor): Point-wise semantic\n                label of each batch.\n            pts_instance_mask (torch.Tensor): Point-wise instance\n                label of each batch.\n            max_gt_nums (int): Max number of GTs for single batch.\n            seed_points (torch.Tensor): Coordinates of seed points.\n            seed_indices (torch.Tensor): Indices of seed points.\n            candidate_indices (torch.Tensor): Indices of object candidates.\n            seed_points_obj_topk (int): k value of k-Closest Points Sampling.\n\n        Returns:\n            tuple[torch.Tensor]: Targets of GroupFree3D head.\n        \"\"\"\n\n        assert self.bbox_coder.with_rot or pts_semantic_mask is not None\n\n        gt_bboxes_3d = gt_bboxes_3d.to(points.device)\n\n        # generate center, dir, size target\n        (center_targets, size_targets, size_class_targets, size_res_targets,\n         dir_class_targets,\n         dir_res_targets) = self.bbox_coder.encode(gt_bboxes_3d, gt_labels_3d)\n\n        # pad targets as original code of GroupFree3D\n        pad_num = max_gt_nums - gt_labels_3d.shape[0]\n        box_label_mask = points.new_zeros([max_gt_nums])\n        box_label_mask[:gt_labels_3d.shape[0]] = 1\n\n        gt_bboxes_pad = F.pad(gt_bboxes_3d.tensor, (0, 0, 0, pad_num))\n        gt_bboxes_pad[gt_labels_3d.shape[0]:, 0:3] += 1000\n        gt_bboxes_3d = gt_bboxes_3d.new_box(gt_bboxes_pad)\n\n        gt_labels_3d = F.pad(gt_labels_3d, (0, pad_num))\n\n        center_targets = F.pad(center_targets, (0, 0, 0, pad_num), value=1000)\n        size_targets = F.pad(size_targets, (0, 0, 0, pad_num))\n        size_class_targets = F.pad(size_class_targets, (0, pad_num))\n        size_res_targets = F.pad(size_res_targets, (0, 0, 0, pad_num))\n        dir_class_targets = F.pad(dir_class_targets, (0, pad_num))\n        dir_res_targets = F.pad(dir_res_targets, (0, pad_num))\n\n        # 0. generate pts_instance_label and pts_obj_mask\n        num_points = points.shape[0]\n        pts_obj_mask = points.new_zeros([num_points], dtype=torch.long)\n        pts_instance_label = points.new_zeros([num_points],\n                                              dtype=torch.long) - 1\n\n        if self.bbox_coder.with_rot:\n            vote_targets = points.new_zeros([num_points, 4 * self.gt_per_seed])\n            vote_target_idx = points.new_zeros([num_points], dtype=torch.long)\n            box_indices_all = gt_bboxes_3d.points_in_boxes_part(points)\n            for i in range(gt_labels_3d.shape[0]):\n                box_indices = box_indices_all[:, i]\n                indices = torch.nonzero(\n                    box_indices, as_tuple=False).squeeze(-1)\n                selected_points = points[indices]\n                pts_obj_mask[indices] = 1\n                vote_targets_tmp = vote_targets[indices]\n                votes = gt_bboxes_3d.gravity_center[i].unsqueeze(\n                    0) - selected_points[:, :3]\n\n                for j in range(self.gt_per_seed):\n                    column_indices = torch.nonzero(\n                        vote_target_idx[indices] == j,\n                        as_tuple=False).squeeze(-1)\n                    vote_targets_tmp[column_indices,\n                                     int(j * 3):int(j * 3 +\n                                                    3)] = votes[column_indices]\n                    vote_targets_tmp[column_indices,\n                                     j + 3 * self.gt_per_seed] = i\n                    if j == 0:\n                        vote_targets_tmp[\n                            column_indices, :3 *\n                            self.gt_per_seed] = votes[column_indices].repeat(\n                                1, self.gt_per_seed)\n                        vote_targets_tmp[column_indices,\n                                         3 * self.gt_per_seed:] = i\n\n                vote_targets[indices] = vote_targets_tmp\n                vote_target_idx[indices] = torch.clamp(\n                    vote_target_idx[indices] + 1, max=2)\n\n            dist = points.new_zeros([num_points, self.gt_per_seed]) + 1000\n            for j in range(self.gt_per_seed):\n                dist[:, j] = (vote_targets[:, 3 * j:3 * j + 3]**2).sum(-1)\n\n            instance_indices = torch.argmin(\n                dist, dim=-1).unsqueeze(-1) + 3 * self.gt_per_seed\n            instance_lable = torch.gather(vote_targets, 1,\n                                          instance_indices).squeeze(-1)\n            pts_instance_label = instance_lable.long()\n            pts_instance_label[pts_obj_mask == 0] = -1\n\n        elif pts_semantic_mask is not None:\n            for i in torch.unique(pts_instance_mask):\n                indices = torch.nonzero(\n                    pts_instance_mask == i, as_tuple=False).squeeze(-1)\n\n                if pts_semantic_mask[indices[0]] < self.num_classes:\n                    selected_points = points[indices, :3]\n                    center = 0.5 * (\n                        selected_points.min(0)[0] + selected_points.max(0)[0])\n\n                    delta_xyz = center - center_targets\n                    instance_lable = torch.argmin((delta_xyz**2).sum(-1))\n                    pts_instance_label[indices] = instance_lable\n                    pts_obj_mask[indices] = 1\n\n        else:\n            raise NotImplementedError\n\n        # 1. generate objectness targets in sampling head\n        gt_num = gt_labels_3d.shape[0]\n        num_seed = seed_points.shape[0]\n        num_candidate = candidate_indices.shape[0]\n\n        object_assignment = torch.gather(pts_instance_label, 0, seed_indices)\n        # set background points to the last gt bbox as original code\n        object_assignment[object_assignment < 0] = gt_num - 1\n        object_assignment_one_hot = gt_bboxes_3d.tensor.new_zeros(\n            (num_seed, gt_num))\n        object_assignment_one_hot.scatter_(1, object_assignment.unsqueeze(-1),\n                                           1)  # (num_seed, gt_num)\n\n        delta_xyz = seed_points.unsqueeze(\n            1) - gt_bboxes_3d.gravity_center.unsqueeze(\n                0)  # (num_seed, gt_num, 3)\n        delta_xyz = delta_xyz / (gt_bboxes_3d.dims.unsqueeze(0) + EPS)\n\n        new_dist = torch.sum(delta_xyz**2, dim=-1)\n        euclidean_dist1 = torch.sqrt(new_dist + EPS)\n        euclidean_dist1 = euclidean_dist1 * object_assignment_one_hot + 100 * (\n            1 - object_assignment_one_hot)\n        # (gt_num, num_seed)\n        euclidean_dist1 = euclidean_dist1.permute(1, 0)\n\n        # gt_num x topk\n        topk_inds = torch.topk(\n            euclidean_dist1,\n            seed_points_obj_topk,\n            largest=False)[1] * box_label_mask[:, None] + \\\n            (box_label_mask[:, None] - 1)\n        topk_inds = topk_inds.long()\n        topk_inds = topk_inds.view(-1).contiguous()\n\n        sampling_targets = torch.zeros(\n            num_seed + 1, dtype=torch.long).to(points.device)\n        sampling_targets[topk_inds] = 1\n        sampling_targets = sampling_targets[:num_seed]\n        # pts_instance_label\n        objectness_label_mask = torch.gather(pts_instance_label, 0,\n                                             seed_indices)  # num_seed\n        sampling_targets[objectness_label_mask < 0] = 0\n\n        # 2. objectness target\n        seed_obj_gt = torch.gather(pts_obj_mask, 0, seed_indices)  # num_seed\n        objectness_targets = torch.gather(seed_obj_gt, 0,\n                                          candidate_indices)  # num_candidate\n\n        # 3. box target\n        seed_instance_label = torch.gather(pts_instance_label, 0,\n                                           seed_indices)  # num_seed\n        query_points_instance_label = torch.gather(\n            seed_instance_label, 0, candidate_indices)  # num_candidate\n\n        # Set assignment\n        # (num_candidate, ) with values in 0,1,...,gt_num-1\n        assignment = query_points_instance_label\n        # set background points to the last gt bbox as original code\n        assignment[assignment < 0] = gt_num - 1\n        assignment_expand = assignment.unsqueeze(1).expand(-1, 3)\n\n        assigned_center_targets = center_targets[assignment]\n        assigned_size_targets = size_targets[assignment]\n\n        dir_class_targets = dir_class_targets[assignment]\n        dir_res_targets = dir_res_targets[assignment]\n        dir_res_targets /= (np.pi / self.num_dir_bins)\n\n        size_class_targets = size_class_targets[assignment]\n        size_res_targets = \\\n            torch.gather(size_res_targets, 0, assignment_expand)\n        one_hot_size_targets = gt_bboxes_3d.tensor.new_zeros(\n            (num_candidate, self.num_sizes))\n        one_hot_size_targets.scatter_(1, size_class_targets.unsqueeze(-1), 1)\n        one_hot_size_targets = one_hot_size_targets.unsqueeze(-1).expand(\n            -1, -1, 3)  # (num_candidate,num_size_cluster,3)\n        mean_sizes = size_res_targets.new_tensor(\n            self.bbox_coder.mean_sizes).unsqueeze(0)\n        pos_mean_sizes = torch.sum(one_hot_size_targets * mean_sizes, 1)\n        size_res_targets /= pos_mean_sizes\n\n        mask_targets = gt_labels_3d[assignment].long()\n\n        objectness_masks = points.new_ones((num_candidate))\n\n        return (sampling_targets, assigned_size_targets, size_class_targets,\n                size_res_targets, dir_class_targets, dir_res_targets,\n                center_targets, assigned_center_targets, mask_targets,\n                objectness_targets, objectness_masks)\n\n    def get_bboxes(self,\n                   points,\n                   bbox_preds,\n                   input_metas,\n                   rescale=False,\n                   use_nms=True):\n        \"\"\"Generate bboxes from GroupFree3D head predictions.\n\n        Args:\n            points (torch.Tensor): Input points.\n            bbox_preds (dict): Predictions from GroupFree3D head.\n            input_metas (list[dict]): Point cloud and image's meta info.\n            rescale (bool): Whether to rescale bboxes.\n            use_nms (bool): Whether to apply NMS, skip nms postprocessing\n                while using GroupFree3D head in rpn stage.\n\n        Returns:\n            list[tuple[torch.Tensor]]: Bounding boxes, scores and labels.\n        \"\"\"\n        # support multi-stage predictions\n        assert self.test_cfg['prediction_stages'] in \\\n            ['last', 'all', 'last_three']\n\n        prefixes = list()\n        if self.test_cfg['prediction_stages'] == 'last':\n            prefixes = [f's{self.num_decoder_layers - 1}.']\n        elif self.test_cfg['prediction_stages'] == 'all':\n            prefixes = ['proposal.'] + \\\n                [f's{i}.' for i in range(self.num_decoder_layers)]\n        elif self.test_cfg['prediction_stages'] == 'last_three':\n            prefixes = [\n                f's{i}.' for i in range(self.num_decoder_layers -\n                                        3, self.num_decoder_layers)\n            ]\n        else:\n            raise NotImplementedError\n\n        obj_scores = list()\n        sem_scores = list()\n        bbox3d = list()\n        for prefix in prefixes:\n            # decode boxes\n            obj_score = bbox_preds[f'{prefix}obj_scores'][..., -1].sigmoid()\n            sem_score = bbox_preds[f'{prefix}sem_scores'].softmax(-1)\n            bbox = self.bbox_coder.decode(bbox_preds, prefix)\n            obj_scores.append(obj_score)\n            sem_scores.append(sem_score)\n            bbox3d.append(bbox)\n\n        obj_scores = torch.cat(obj_scores, dim=1)\n        sem_scores = torch.cat(sem_scores, dim=1)\n        bbox3d = torch.cat(bbox3d, dim=1)\n\n        if use_nms:\n            batch_size = bbox3d.shape[0]\n            results = list()\n            for b in range(batch_size):\n                bbox_selected, score_selected, labels = \\\n                    self.multiclass_nms_single(obj_scores[b], sem_scores[b],\n                                               bbox3d[b], points[b, ..., :3],\n                                               input_metas[b])\n                bbox = input_metas[b]['box_type_3d'](\n                    bbox_selected,\n                    box_dim=bbox_selected.shape[-1],\n                    with_yaw=self.bbox_coder.with_rot)\n                results.append((bbox, score_selected, labels))\n\n            return results\n        else:\n            return bbox3d\n\n    def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points,\n                              input_meta):\n        \"\"\"Multi-class nms in single batch.\n\n        Args:\n            obj_scores (torch.Tensor): Objectness score of bounding boxes.\n            sem_scores (torch.Tensor): semantic class score of bounding boxes.\n            bbox (torch.Tensor): Predicted bounding boxes.\n            points (torch.Tensor): Input points.\n            input_meta (dict): Point cloud and image's meta info.\n\n        Returns:\n            tuple[torch.Tensor]: Bounding boxes, scores and labels.\n        \"\"\"\n        bbox = input_meta['box_type_3d'](\n            bbox,\n            box_dim=bbox.shape[-1],\n            with_yaw=self.bbox_coder.with_rot,\n            origin=(0.5, 0.5, 0.5))\n        box_indices = bbox.points_in_boxes_all(points)\n\n        corner3d = bbox.corners\n        minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6)))\n        minmax_box3d[:, :3] = torch.min(corner3d, dim=1)[0]\n        minmax_box3d[:, 3:] = torch.max(corner3d, dim=1)[0]\n\n        nonempty_box_mask = box_indices.T.sum(1) > 5\n\n        bbox_classes = torch.argmax(sem_scores, -1)\n        nms_selected = aligned_3d_nms(minmax_box3d[nonempty_box_mask],\n                                      obj_scores[nonempty_box_mask],\n                                      bbox_classes[nonempty_box_mask],\n                                      self.test_cfg.nms_thr)\n\n        # filter empty boxes and boxes with low score\n        scores_mask = (obj_scores > self.test_cfg.score_thr)\n        nonempty_box_inds = torch.nonzero(\n            nonempty_box_mask, as_tuple=False).flatten()\n        nonempty_mask = torch.zeros_like(bbox_classes).scatter(\n            0, nonempty_box_inds[nms_selected], 1)\n        selected = (nonempty_mask.bool() & scores_mask.bool())\n\n        if self.test_cfg.per_class_proposal:\n            bbox_selected, score_selected, labels = [], [], []\n            for k in range(sem_scores.shape[-1]):\n                bbox_selected.append(bbox[selected].tensor)\n                score_selected.append(obj_scores[selected] *\n                                      sem_scores[selected][:, k])\n                labels.append(\n                    torch.zeros_like(bbox_classes[selected]).fill_(k))\n            bbox_selected = torch.cat(bbox_selected, 0)\n            score_selected = torch.cat(score_selected, 0)\n            labels = torch.cat(labels, 0)\n        else:\n            bbox_selected = bbox[selected].tensor\n            score_selected = obj_scores[selected]\n            labels = bbox_classes[selected]\n\n        return bbox_selected, score_selected, labels\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/monoflex_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.cnn import xavier_init\nfrom torch import nn as nn\n\nfrom mmdet3d.core.utils import get_ellip_gaussian_2D\nfrom mmdet3d.models.model_utils import EdgeFusionModule\nfrom mmdet3d.models.utils import (filter_outside_objs, get_edge_indices,\n                                  get_keypoints, handle_proj_objs)\nfrom mmdet.core import multi_apply\nfrom mmdet.core.bbox.builder import build_bbox_coder\nfrom mmdet.models.utils import gaussian_radius, gen_gaussian_target\nfrom mmdet.models.utils.gaussian_target import (get_local_maximum,\n                                                get_topk_from_heatmap,\n                                                transpose_and_gather_feat)\nfrom ..builder import HEADS, build_loss\nfrom .anchor_free_mono3d_head import AnchorFreeMono3DHead\n\n\n@HEADS.register_module()\nclass MonoFlexHead(AnchorFreeMono3DHead):\n    r\"\"\"MonoFlex head used in `MonoFlex <https://arxiv.org/abs/2104.02323>`_\n\n    .. code-block:: none\n\n                / --> 3 x 3 conv --> 1 x 1 conv --> [edge fusion] --> cls\n                |\n                | --> 3 x 3 conv --> 1 x 1 conv --> 2d bbox\n                |\n                | --> 3 x 3 conv --> 1 x 1 conv --> [edge fusion] --> 2d offsets\n                |\n                | --> 3 x 3 conv --> 1 x 1 conv -->  keypoints offsets\n                |\n                | --> 3 x 3 conv --> 1 x 1 conv -->  keypoints uncertainty\n        feature\n                | --> 3 x 3 conv --> 1 x 1 conv -->  keypoints uncertainty\n                |\n                | --> 3 x 3 conv --> 1 x 1 conv -->   3d dimensions\n                |\n                |                  |--- 1 x 1 conv -->  ori cls\n                | --> 3 x 3 conv --|\n                |                  |--- 1 x 1 conv -->  ori offsets\n                |\n                | --> 3 x 3 conv --> 1 x 1 conv -->  depth\n                |\n                \\ --> 3 x 3 conv --> 1 x 1 conv -->  depth uncertainty\n\n    Args:\n        use_edge_fusion (bool): Whether to use edge fusion module while\n            feature extraction.\n        edge_fusion_inds (list[tuple]): Indices of feature to use edge fusion.\n        edge_heatmap_ratio (float): Ratio of generating target heatmap.\n        filter_outside_objs (bool, optional): Whether to filter the\n            outside objects. Default: True.\n        loss_cls (dict, optional): Config of classification loss.\n            Default: loss_cls=dict(type='GaussionFocalLoss', loss_weight=1.0).\n        loss_bbox (dict, optional): Config of localization loss.\n            Default: loss_bbox=dict(type='IOULoss', loss_weight=10.0).\n        loss_dir (dict, optional): Config of direction classification loss.\n            Default: dict(type='MultibinLoss', loss_weight=0.1).\n        loss_keypoints (dict, optional): Config of keypoints loss.\n            Default: dict(type='L1Loss', loss_weight=0.1).\n        loss_dims: (dict, optional): Config of dimensions loss.\n            Default: dict(type='L1Loss', loss_weight=0.1).\n        loss_offsets2d: (dict, optional): Config of offsets2d loss.\n            Default: dict(type='L1Loss', loss_weight=0.1).\n        loss_direct_depth: (dict, optional): Config of directly regression depth loss.\n            Default: dict(type='L1Loss', loss_weight=0.1).\n        loss_keypoints_depth: (dict, optional): Config of keypoints decoded depth loss.\n            Default: dict(type='L1Loss', loss_weight=0.1).\n        loss_combined_depth: (dict, optional): Config of combined depth loss.\n            Default: dict(type='L1Loss', loss_weight=0.1).\n        loss_attr (dict, optional): Config of attribute classification loss.\n            In MonoFlex, Default: None.\n        bbox_coder (dict, optional): Bbox coder for encoding and decoding boxes.\n            Default: dict(type='MonoFlexCoder', code_size=7).\n        norm_cfg (dict, optional): Dictionary to construct and config norm layer.\n            Default: norm_cfg=dict(type='GN', num_groups=32, requires_grad=True).\n        init_cfg (dict): Initialization config dict. Default: None.\n    \"\"\"  # noqa: E501\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 use_edge_fusion,\n                 edge_fusion_inds,\n                 edge_heatmap_ratio,\n                 filter_outside_objs=True,\n                 loss_cls=dict(type='GaussianFocalLoss', loss_weight=1.0),\n                 loss_bbox=dict(type='IoULoss', loss_weight=0.1),\n                 loss_dir=dict(type='MultiBinLoss', loss_weight=0.1),\n                 loss_keypoints=dict(type='L1Loss', loss_weight=0.1),\n                 loss_dims=dict(type='L1Loss', loss_weight=0.1),\n                 loss_offsets2d=dict(type='L1Loss', loss_weight=0.1),\n                 loss_direct_depth=dict(type='L1Loss', loss_weight=0.1),\n                 loss_keypoints_depth=dict(type='L1Loss', loss_weight=0.1),\n                 loss_combined_depth=dict(type='L1Loss', loss_weight=0.1),\n                 loss_attr=None,\n                 bbox_coder=dict(type='MonoFlexCoder', code_size=7),\n                 norm_cfg=dict(type='BN'),\n                 init_cfg=None,\n                 init_bias=-2.19,\n                 **kwargs):\n        self.use_edge_fusion = use_edge_fusion\n        self.edge_fusion_inds = edge_fusion_inds\n        super().__init__(\n            num_classes,\n            in_channels,\n            loss_cls=loss_cls,\n            loss_bbox=loss_bbox,\n            loss_dir=loss_dir,\n            loss_attr=loss_attr,\n            norm_cfg=norm_cfg,\n            init_cfg=init_cfg,\n            **kwargs)\n        self.filter_outside_objs = filter_outside_objs\n        self.edge_heatmap_ratio = edge_heatmap_ratio\n        self.init_bias = init_bias\n        self.loss_dir = build_loss(loss_dir)\n        self.loss_keypoints = build_loss(loss_keypoints)\n        self.loss_dims = build_loss(loss_dims)\n        self.loss_offsets2d = build_loss(loss_offsets2d)\n        self.loss_direct_depth = build_loss(loss_direct_depth)\n        self.loss_keypoints_depth = build_loss(loss_keypoints_depth)\n        self.loss_combined_depth = build_loss(loss_combined_depth)\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n\n    def _init_edge_module(self):\n        \"\"\"Initialize edge fusion module for feature extraction.\"\"\"\n        self.edge_fuse_cls = EdgeFusionModule(self.num_classes, 256)\n        for i in range(len(self.edge_fusion_inds)):\n            reg_inds, out_inds = self.edge_fusion_inds[i]\n            out_channels = self.group_reg_dims[reg_inds][out_inds]\n            fusion_layer = EdgeFusionModule(out_channels, 256)\n            layer_name = f'edge_fuse_reg_{reg_inds}_{out_inds}'\n            self.add_module(layer_name, fusion_layer)\n\n    def init_weights(self):\n        \"\"\"Initialize weights.\"\"\"\n        super().init_weights()\n        self.conv_cls.bias.data.fill_(self.init_bias)\n        xavier_init(self.conv_regs[4][0], gain=0.01)\n        xavier_init(self.conv_regs[7][0], gain=0.01)\n        for m in self.conv_regs.modules():\n            if isinstance(m, nn.Conv2d):\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n\n    def _init_predictor(self):\n        \"\"\"Initialize predictor layers of the head.\"\"\"\n        self.conv_cls_prev = self._init_branch(\n            conv_channels=self.cls_branch,\n            conv_strides=(1, ) * len(self.cls_branch))\n        self.conv_cls = nn.Conv2d(self.cls_branch[-1], self.cls_out_channels,\n                                  1)\n        # init regression head\n        self.conv_reg_prevs = nn.ModuleList()\n        # init output head\n        self.conv_regs = nn.ModuleList()\n        # group_reg_dims:\n        # ((4, ), (2, ), (20, ), (3, ), (3, ), (8, 8), (1, ), (1, ))\n        for i in range(len(self.group_reg_dims)):\n            reg_dims = self.group_reg_dims[i]\n            reg_branch_channels = self.reg_branch[i]\n            out_channel = self.out_channels[i]\n            reg_list = nn.ModuleList()\n            if len(reg_branch_channels) > 0:\n                self.conv_reg_prevs.append(\n                    self._init_branch(\n                        conv_channels=reg_branch_channels,\n                        conv_strides=(1, ) * len(reg_branch_channels)))\n                for reg_dim in reg_dims:\n                    reg_list.append(nn.Conv2d(out_channel, reg_dim, 1))\n                self.conv_regs.append(reg_list)\n            else:\n                self.conv_reg_prevs.append(None)\n                for reg_dim in reg_dims:\n                    reg_list.append(nn.Conv2d(self.feat_channels, reg_dim, 1))\n                self.conv_regs.append(reg_list)\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the head.\"\"\"\n        self._init_predictor()\n        if self.use_edge_fusion:\n            self._init_edge_module()\n\n    def forward_train(self, x, input_metas, gt_bboxes, gt_labels, gt_bboxes_3d,\n                      gt_labels_3d, centers2d, depths, attr_labels,\n                      gt_bboxes_ignore, proposal_cfg, **kwargs):\n        \"\"\"\n        Args:\n            x (list[Tensor]): Features from FPN.\n            input_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes (list[Tensor]): Ground truth bboxes of the image,\n                shape (num_gts, 4).\n            gt_labels (list[Tensor]): Ground truth labels of each box,\n                shape (num_gts,).\n            gt_bboxes_3d (list[Tensor]): 3D ground truth bboxes of the image,\n                shape (num_gts, self.bbox_code_size).\n            gt_labels_3d (list[Tensor]): 3D ground truth labels of each box,\n                shape (num_gts,).\n            centers2d (list[Tensor]): Projected 3D center of each box,\n                shape (num_gts, 2).\n            depths (list[Tensor]): Depth of projected 3D center of each box,\n                shape (num_gts,).\n            attr_labels (list[Tensor]): Attribute labels of each box,\n                shape (num_gts,).\n            gt_bboxes_ignore (list[Tensor]): Ground truth bboxes to be\n                ignored, shape (num_ignored_gts, 4).\n            proposal_cfg (mmcv.Config): Test / postprocessing configuration,\n                if None, test_cfg would be used\n        Returns:\n            tuple:\n                losses: (dict[str, Tensor]): A dictionary of loss components.\n                proposal_list (list[Tensor]): Proposals of each image.\n        \"\"\"\n        outs = self(x, input_metas)\n        if gt_labels is None:\n            loss_inputs = outs + (gt_bboxes, gt_bboxes_3d, centers2d, depths,\n                                  attr_labels, input_metas)\n        else:\n            loss_inputs = outs + (gt_bboxes, gt_labels, gt_bboxes_3d,\n                                  gt_labels_3d, centers2d, depths, attr_labels,\n                                  input_metas)\n        losses = self.loss(*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)\n        if proposal_cfg is None:\n            return losses\n        else:\n            proposal_list = self.get_bboxes(\n                *outs, input_metas, cfg=proposal_cfg)\n            return losses, proposal_list\n\n    def forward(self, feats, input_metas):\n        \"\"\"Forward features from the upstream network.\n\n        Args:\n            feats (list[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n            input_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n\n        Returns:\n            tuple:\n                cls_scores (list[Tensor]): Box scores for each scale level,\n                    each is a 4D-tensor, the channel number is\n                    num_points * num_classes.\n                bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                    level, each is a 4D-tensor, the channel number is\n                    num_points * bbox_code_size.\n        \"\"\"\n        mlvl_input_metas = [input_metas for i in range(len(feats))]\n        return multi_apply(self.forward_single, feats, mlvl_input_metas)\n\n    def forward_single(self, x, input_metas):\n        \"\"\"Forward features of a single scale level.\n\n        Args:\n            x (Tensor): Feature maps from a specific FPN feature level.\n            input_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n\n        Returns:\n            tuple: Scores for each class, bbox predictions.\n        \"\"\"\n        img_h, img_w = input_metas[0]['pad_shape'][:2]\n        batch_size, _, feat_h, feat_w = x.shape\n        downsample_ratio = img_h / feat_h\n\n        for conv_cls_prev_layer in self.conv_cls_prev:\n            cls_feat = conv_cls_prev_layer(x)\n        out_cls = self.conv_cls(cls_feat)\n\n        if self.use_edge_fusion:\n            # calculate the edge indices for the batch data\n            edge_indices_list = get_edge_indices(\n                input_metas, downsample_ratio, device=x.device)\n            edge_lens = [\n                edge_indices.shape[0] for edge_indices in edge_indices_list\n            ]\n            max_edge_len = max(edge_lens)\n            edge_indices = x.new_zeros((batch_size, max_edge_len, 2),\n                                       dtype=torch.long)\n            for i in range(batch_size):\n                edge_indices[i, :edge_lens[i]] = edge_indices_list[i]\n            # cls feature map edge fusion\n            out_cls = self.edge_fuse_cls(cls_feat, out_cls, edge_indices,\n                                         edge_lens, feat_h, feat_w)\n\n        bbox_pred = []\n\n        for i in range(len(self.group_reg_dims)):\n            reg_feat = x.clone()\n            # feature regression head\n            if len(self.reg_branch[i]) > 0:\n                for conv_reg_prev_layer in self.conv_reg_prevs[i]:\n                    reg_feat = conv_reg_prev_layer(reg_feat)\n\n            for j, conv_reg in enumerate(self.conv_regs[i]):\n                out_reg = conv_reg(reg_feat)\n                #  Use Edge Fusion Module\n                if self.use_edge_fusion and (i, j) in self.edge_fusion_inds:\n                    # reg feature map edge fusion\n                    out_reg = getattr(self, 'edge_fuse_reg_{}_{}'.format(\n                        i, j))(reg_feat, out_reg, edge_indices, edge_lens,\n                               feat_h, feat_w)\n                bbox_pred.append(out_reg)\n\n        bbox_pred = torch.cat(bbox_pred, dim=1)\n        cls_score = out_cls.sigmoid()  # turn to 0-1\n        cls_score = cls_score.clamp(min=1e-4, max=1 - 1e-4)\n\n        return cls_score, bbox_pred\n\n    def get_bboxes(self, cls_scores, bbox_preds, input_metas):\n        \"\"\"Generate bboxes from bbox head predictions.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level.\n            bbox_preds (list[Tensor]): Box regression for each scale.\n            input_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            rescale (bool): If True, return boxes in original image space.\n        Returns:\n            list[tuple[:obj:`CameraInstance3DBoxes`, Tensor, Tensor, None]]:\n                Each item in result_list is 4-tuple.\n        \"\"\"\n        assert len(cls_scores) == len(bbox_preds) == 1\n        cam2imgs = torch.stack([\n            cls_scores[0].new_tensor(input_meta['cam2img'])\n            for input_meta in input_metas\n        ])\n        batch_bboxes, batch_scores, batch_topk_labels = self.decode_heatmap(\n            cls_scores[0],\n            bbox_preds[0],\n            input_metas,\n            cam2imgs=cam2imgs,\n            topk=100,\n            kernel=3)\n\n        result_list = []\n        for img_id in range(len(input_metas)):\n\n            bboxes = batch_bboxes[img_id]\n            scores = batch_scores[img_id]\n            labels = batch_topk_labels[img_id]\n\n            keep_idx = scores > 0.25\n            bboxes = bboxes[keep_idx]\n            scores = scores[keep_idx]\n            labels = labels[keep_idx]\n\n            bboxes = input_metas[img_id]['box_type_3d'](\n                bboxes, box_dim=self.bbox_code_size, origin=(0.5, 0.5, 0.5))\n            attrs = None\n            result_list.append((bboxes, scores, labels, attrs))\n\n        return result_list\n\n    def decode_heatmap(self,\n                       cls_score,\n                       reg_pred,\n                       input_metas,\n                       cam2imgs,\n                       topk=100,\n                       kernel=3):\n        \"\"\"Transform outputs into detections raw bbox predictions.\n\n        Args:\n            class_score (Tensor): Center predict heatmap,\n                shape (B, num_classes, H, W).\n            reg_pred (Tensor): Box regression map.\n                shape (B, channel, H , W).\n            input_metas (List[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            cam2imgs (Tensor): Camera intrinsic matrix.\n                shape (N, 4, 4)\n            topk (int, optional): Get top k center keypoints from heatmap.\n                Default 100.\n            kernel (int, optional): Max pooling kernel for extract local\n                maximum pixels. Default 3.\n\n        Returns:\n            tuple[torch.Tensor]: Decoded output of SMOKEHead, containing\n               the following Tensors:\n              - batch_bboxes (Tensor): Coords of each 3D box.\n                    shape (B, k, 7)\n              - batch_scores (Tensor): Scores of each 3D box.\n                    shape (B, k)\n              - batch_topk_labels (Tensor): Categories of each 3D box.\n                    shape (B, k)\n        \"\"\"\n        img_h, img_w = input_metas[0]['pad_shape'][:2]\n        batch_size, _, feat_h, feat_w = cls_score.shape\n\n        downsample_ratio = img_h / feat_h\n        center_heatmap_pred = get_local_maximum(cls_score, kernel=kernel)\n\n        *batch_dets, topk_ys, topk_xs = get_topk_from_heatmap(\n            center_heatmap_pred, k=topk)\n        batch_scores, batch_index, batch_topk_labels = batch_dets\n\n        regression = transpose_and_gather_feat(reg_pred, batch_index)\n        regression = regression.view(-1, 8)\n\n        pred_base_centers2d = torch.cat(\n            [topk_xs.view(-1, 1),\n             topk_ys.view(-1, 1).float()], dim=1)\n        preds = self.bbox_coder.decode(regression, batch_topk_labels,\n                                       downsample_ratio, cam2imgs)\n        pred_locations = self.bbox_coder.decode_location(\n            pred_base_centers2d, preds['offsets2d'], preds['combined_depth'],\n            cam2imgs, downsample_ratio)\n        pred_yaws = self.bbox_coder.decode_orientation(\n            preds['orientations']).unsqueeze(-1)\n        pred_dims = preds['dimensions']\n        batch_bboxes = torch.cat((pred_locations, pred_dims, pred_yaws), dim=1)\n        batch_bboxes = batch_bboxes.view(batch_size, -1, self.bbox_code_size)\n        return batch_bboxes, batch_scores, batch_topk_labels\n\n    def get_predictions(self, pred_reg, labels3d, centers2d, reg_mask,\n                        batch_indices, input_metas, downsample_ratio):\n        \"\"\"Prepare predictions for computing loss.\n\n        Args:\n            pred_reg (Tensor): Box regression map.\n                shape (B, channel, H , W).\n            labels3d (Tensor): Labels of each 3D box.\n                shape (B * max_objs, )\n            centers2d (Tensor): Coords of each projected 3D box\n                center on image. shape (N, 2)\n            reg_mask (Tensor): Indexes of the existence of the 3D box.\n                shape (B * max_objs, )\n            batch_indices (Tenosr): Batch indices of the 3D box.\n                shape (N, 3)\n            input_metas (list[dict]): Meta information of each image,\n                e.g., image size, scaling factor, etc.\n            downsample_ratio (int): The stride of feature map.\n\n        Returns:\n            dict: The predictions for computing loss.\n        \"\"\"\n        batch, channel = pred_reg.shape[0], pred_reg.shape[1]\n        w = pred_reg.shape[3]\n        cam2imgs = torch.stack([\n            centers2d.new_tensor(input_meta['cam2img'])\n            for input_meta in input_metas\n        ])\n        # (batch_size, 4, 4) -> (N, 4, 4)\n        cam2imgs = cam2imgs[batch_indices, :, :]\n        centers2d_inds = centers2d[:, 1] * w + centers2d[:, 0]\n        centers2d_inds = centers2d_inds.view(batch, -1)\n        pred_regression = transpose_and_gather_feat(pred_reg, centers2d_inds)\n        pred_regression_pois = pred_regression.view(-1, channel)[reg_mask]\n        preds = self.bbox_coder.decode(pred_regression_pois, labels3d,\n                                       downsample_ratio, cam2imgs)\n\n        return preds\n\n    def get_targets(self, gt_bboxes_list, gt_labels_list, gt_bboxes_3d_list,\n                    gt_labels_3d_list, centers2d_list, depths_list, feat_shape,\n                    img_shape, input_metas):\n        \"\"\"Get training targets for batch images.\n``\n        Args:\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes of each\n                image, shape (num_gt, 4).\n            gt_labels_list (list[Tensor]): Ground truth labels of each\n                box, shape (num_gt,).\n            gt_bboxes_3d_list (list[:obj:`CameraInstance3DBoxes`]): 3D\n                Ground truth bboxes of each image,\n                shape (num_gt, bbox_code_size).\n            gt_labels_3d_list (list[Tensor]): 3D Ground truth labels of\n                each box, shape (num_gt,).\n            centers2d_list (list[Tensor]): Projected 3D centers onto 2D\n                image, shape (num_gt, 2).\n            depths_list (list[Tensor]): Depth of projected 3D centers onto 2D\n                image, each has shape (num_gt, 1).\n            feat_shape (tuple[int]): Feature map shape with value,\n                shape (B, _, H, W).\n            img_shape (tuple[int]): Image shape in [h, w] format.\n            input_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n\n        Returns:\n            tuple[Tensor, dict]: The Tensor value is the targets of\n                center heatmap, the dict has components below:\n              - base_centers2d_target (Tensor): Coords of each projected 3D box\n                    center on image. shape (B * max_objs, 2), [dtype: int]\n              - labels3d (Tensor): Labels of each 3D box.\n                    shape (N, )\n              - reg_mask (Tensor): Mask of the existence of the 3D box.\n                    shape (B * max_objs, )\n              - batch_indices (Tensor): Batch id of the 3D box.\n                    shape (N, )\n              - depth_target (Tensor): Depth target of each 3D box.\n                    shape (N, )\n              - keypoints2d_target (Tensor): Keypoints of each projected 3D box\n                    on image. shape (N, 10, 2)\n              - keypoints_mask (Tensor): Keypoints mask of each projected 3D\n                    box on image. shape (N, 10)\n              - keypoints_depth_mask (Tensor): Depths decoded from keypoints\n                    of each 3D box. shape (N, 3)\n              - orientations_target (Tensor): Orientation (encoded local yaw)\n                    target of each 3D box. shape (N, )\n              - offsets2d_target (Tensor): Offsets target of each projected\n                    3D box. shape (N, 2)\n              - dimensions_target (Tensor): Dimensions target of each 3D box.\n                    shape (N, 3)\n              - downsample_ratio (int): The stride of feature map.\n        \"\"\"\n\n        img_h, img_w = img_shape[:2]\n        batch_size, _, feat_h, feat_w = feat_shape\n\n        width_ratio = float(feat_w / img_w)  # 1/4\n        height_ratio = float(feat_h / img_h)  # 1/4\n\n        assert width_ratio == height_ratio\n\n        # Whether to filter the objects which are not in FOV.\n        if self.filter_outside_objs:\n            filter_outside_objs(gt_bboxes_list, gt_labels_list,\n                                gt_bboxes_3d_list, gt_labels_3d_list,\n                                centers2d_list, input_metas)\n\n        # transform centers2d to base centers2d for regression and\n        # heatmap generation.\n        # centers2d = int(base_centers2d) + offsets2d\n        base_centers2d_list, offsets2d_list, trunc_mask_list = \\\n            handle_proj_objs(centers2d_list, gt_bboxes_list, input_metas)\n\n        keypoints2d_list, keypoints_mask_list, keypoints_depth_mask_list = \\\n            get_keypoints(gt_bboxes_3d_list, centers2d_list, input_metas)\n\n        center_heatmap_target = gt_bboxes_list[-1].new_zeros(\n            [batch_size, self.num_classes, feat_h, feat_w])\n\n        for batch_id in range(batch_size):\n            # project gt_bboxes from input image to feat map\n            gt_bboxes = gt_bboxes_list[batch_id] * width_ratio\n            gt_labels = gt_labels_list[batch_id]\n\n            # project base centers2d from input image to feat map\n            gt_base_centers2d = base_centers2d_list[batch_id] * width_ratio\n            trunc_masks = trunc_mask_list[batch_id]\n\n            for j, base_center2d in enumerate(gt_base_centers2d):\n                if trunc_masks[j]:\n                    # for outside objects, generate ellipse heatmap\n                    base_center2d_x_int, base_center2d_y_int = \\\n                        base_center2d.int()\n                    scale_box_w = min(base_center2d_x_int - gt_bboxes[j][0],\n                                      gt_bboxes[j][2] - base_center2d_x_int)\n                    scale_box_h = min(base_center2d_y_int - gt_bboxes[j][1],\n                                      gt_bboxes[j][3] - base_center2d_y_int)\n                    radius_x = scale_box_w * self.edge_heatmap_ratio\n                    radius_y = scale_box_h * self.edge_heatmap_ratio\n                    radius_x, radius_y = max(0, int(radius_x)), max(\n                        0, int(radius_y))\n                    assert min(radius_x, radius_y) == 0\n                    ind = gt_labels[j]\n                    get_ellip_gaussian_2D(\n                        center_heatmap_target[batch_id, ind],\n                        [base_center2d_x_int, base_center2d_y_int], radius_x,\n                        radius_y)\n                else:\n                    base_center2d_x_int, base_center2d_y_int = \\\n                        base_center2d.int()\n                    scale_box_h = (gt_bboxes[j][3] - gt_bboxes[j][1])\n                    scale_box_w = (gt_bboxes[j][2] - gt_bboxes[j][0])\n                    radius = gaussian_radius([scale_box_h, scale_box_w],\n                                             min_overlap=0.7)\n                    radius = max(0, int(radius))\n                    ind = gt_labels[j]\n                    gen_gaussian_target(\n                        center_heatmap_target[batch_id, ind],\n                        [base_center2d_x_int, base_center2d_y_int], radius)\n\n        avg_factor = max(1, center_heatmap_target.eq(1).sum())\n        num_ctrs = [centers2d.shape[0] for centers2d in centers2d_list]\n        max_objs = max(num_ctrs)\n        batch_indices = [\n            centers2d_list[0].new_full((num_ctrs[i], ), i)\n            for i in range(batch_size)\n        ]\n        batch_indices = torch.cat(batch_indices, dim=0)\n        reg_mask = torch.zeros(\n            (batch_size, max_objs),\n            dtype=torch.bool).to(base_centers2d_list[0].device)\n        gt_bboxes_3d = input_metas['box_type_3d'].cat(gt_bboxes_3d_list)\n        gt_bboxes_3d = gt_bboxes_3d.to(base_centers2d_list[0].device)\n\n        # encode original local yaw to multibin format\n        orienations_target = self.bbox_coder.encode(gt_bboxes_3d)\n\n        batch_base_centers2d = base_centers2d_list[0].new_zeros(\n            (batch_size, max_objs, 2))\n\n        for i in range(batch_size):\n            reg_mask[i, :num_ctrs[i]] = 1\n            batch_base_centers2d[i, :num_ctrs[i]] = base_centers2d_list[i]\n\n        flatten_reg_mask = reg_mask.flatten()\n\n        # transform base centers2d from input scale to output scale\n        batch_base_centers2d = batch_base_centers2d.view(-1, 2) * width_ratio\n\n        dimensions_target = gt_bboxes_3d.tensor[:, 3:6]\n        labels_3d = torch.cat(gt_labels_3d_list)\n        keypoints2d_target = torch.cat(keypoints2d_list)\n        keypoints_mask = torch.cat(keypoints_mask_list)\n        keypoints_depth_mask = torch.cat(keypoints_depth_mask_list)\n        offsets2d_target = torch.cat(offsets2d_list)\n        bboxes2d = torch.cat(gt_bboxes_list)\n\n        # transform FCOS style bbox into [x1, y1, x2, y2] format.\n        bboxes2d_target = torch.cat([bboxes2d[:, 0:2] * -1, bboxes2d[:, 2:]],\n                                    dim=-1)\n        depths = torch.cat(depths_list)\n\n        target_labels = dict(\n            base_centers2d_target=batch_base_centers2d.int(),\n            labels3d=labels_3d,\n            reg_mask=flatten_reg_mask,\n            batch_indices=batch_indices,\n            bboxes2d_target=bboxes2d_target,\n            depth_target=depths,\n            keypoints2d_target=keypoints2d_target,\n            keypoints_mask=keypoints_mask,\n            keypoints_depth_mask=keypoints_depth_mask,\n            orienations_target=orienations_target,\n            offsets2d_target=offsets2d_target,\n            dimensions_target=dimensions_target,\n            downsample_ratio=1 / width_ratio)\n\n        return center_heatmap_target, avg_factor, target_labels\n\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             gt_bboxes,\n             gt_labels,\n             gt_bboxes_3d,\n             gt_labels_3d,\n             centers2d,\n             depths,\n             attr_labels,\n             input_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute loss of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level.\n                shape (num_gt, 4).\n            bbox_preds (list[Tensor]): Box dims is a 4D-tensor, the channel\n                number is bbox_code_size.\n                shape (B, 7, H, W).\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image.\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): Class indices corresponding to each box.\n                shape (num_gts, ).\n            gt_bboxes_3d (list[:obj:`CameraInstance3DBoxes`]): 3D boxes ground\n                truth. it is the flipped gt_bboxes\n            gt_labels_3d (list[Tensor]): Same as gt_labels.\n            centers2d (list[Tensor]): 2D centers on the image.\n                shape (num_gts, 2).\n            depths (list[Tensor]): Depth ground truth.\n                shape (num_gts, ).\n            attr_labels (list[Tensor]): Attributes indices of each box.\n                In kitti it's None.\n            input_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (None | list[Tensor]): Specify which bounding\n                boxes can be ignored when computing the loss.\n                Default: None.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        assert len(cls_scores) == len(bbox_preds) == 1\n        assert attr_labels is None\n        assert gt_bboxes_ignore is None\n        center2d_heatmap = cls_scores[0]\n        pred_reg = bbox_preds[0]\n\n        center2d_heatmap_target, avg_factor, target_labels = \\\n            self.get_targets(gt_bboxes, gt_labels, gt_bboxes_3d,\n                             gt_labels_3d, centers2d, depths,\n                             center2d_heatmap.shape,\n                             input_metas[0]['pad_shape'],\n                             input_metas)\n\n        preds = self.get_predictions(\n            pred_reg=pred_reg,\n            labels3d=target_labels['labels3d'],\n            centers2d=target_labels['base_centers2d_target'],\n            reg_mask=target_labels['reg_mask'],\n            batch_indices=target_labels['batch_indices'],\n            input_metas=input_metas,\n            downsample_ratio=target_labels['downsample_ratio'])\n\n        # heatmap loss\n        loss_cls = self.loss_cls(\n            center2d_heatmap, center2d_heatmap_target, avg_factor=avg_factor)\n\n        # bbox2d regression loss\n        loss_bbox = self.loss_bbox(preds['bboxes2d'],\n                                   target_labels['bboxes2d_target'])\n\n        # keypoints loss, the keypoints in predictions and target are all\n        # local coordinates. Check the mask dtype should be bool, not int\n        # or float to ensure the indexing is bool index\n        keypoints2d_mask = target_labels['keypoints2d_mask']\n        loss_keypoints = self.loss_keypoints(\n            preds['keypoints2d'][keypoints2d_mask],\n            target_labels['keypoints2d_target'][keypoints2d_mask])\n\n        # orientations loss\n        loss_dir = self.loss_dir(preds['orientations'],\n                                 target_labels['orientations_target'])\n\n        # dimensions loss\n        loss_dims = self.loss_dims(preds['dimensions'],\n                                   target_labels['dimensions_target'])\n\n        # offsets for center heatmap\n        loss_offsets2d = self.loss_offsets2d(preds['offsets2d'],\n                                             target_labels['offsets2d_target'])\n\n        # directly regressed depth loss with direct depth uncertainty loss\n        direct_depth_weights = torch.exp(-preds['direct_depth_uncertainty'])\n        loss_weight_1 = self.loss_direct_depth.loss_weight\n        loss_direct_depth = self.loss_direct_depth(\n            preds['direct_depth'], target_labels['depth_target'],\n            direct_depth_weights)\n        loss_uncertainty_1 =\\\n            preds['direct_depth_uncertainty'] * loss_weight_1\n        loss_direct_depth = loss_direct_depth + loss_uncertainty_1.mean()\n\n        # keypoints decoded depth loss with keypoints depth uncertainty loss\n        depth_mask = target_labels['keypoints_depth_mask']\n        depth_target = target_labels['depth_target'].unsqueeze(-1).repeat(1, 3)\n        valid_keypoints_depth_uncertainty = preds[\n            'keypoints_depth_uncertainty'][depth_mask]\n        valid_keypoints_depth_weights = torch.exp(\n            -valid_keypoints_depth_uncertainty)\n        loss_keypoints_depth = self.loss_keypoint_depth(\n            preds['keypoints_depth'][depth_mask], depth_target[depth_mask],\n            valid_keypoints_depth_weights)\n        loss_weight_2 = self.loss_keypoints_depth.loss_weight\n        loss_uncertainty_2 =\\\n            valid_keypoints_depth_uncertainty * loss_weight_2\n        loss_keypoints_depth = loss_keypoints_depth + loss_uncertainty_2.mean()\n\n        # combined depth loss for optimiaze the uncertainty\n        loss_combined_depth = self.loss_combined_depth(\n            preds['combined_depth'], target_labels['depth_target'])\n\n        loss_dict = dict(\n            loss_cls=loss_cls,\n            loss_bbox=loss_bbox,\n            loss_keypoints=loss_keypoints,\n            loss_dir=loss_dir,\n            loss_dims=loss_dims,\n            loss_offsets2d=loss_offsets2d,\n            loss_direct_depth=loss_direct_depth,\n            loss_keypoints_depth=loss_keypoints_depth,\n            loss_combined_depth=loss_combined_depth)\n\n        return loss_dict\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/parta2_rpn_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nfrom mmcv.runner import force_fp32\n\nfrom mmdet3d.core import limit_period, xywhr2xyxyr\nfrom mmdet3d.core.post_processing import nms_bev, nms_normal_bev\nfrom ..builder import HEADS\nfrom .anchor3d_head import Anchor3DHead\n\n\n@HEADS.register_module()\nclass PartA2RPNHead(Anchor3DHead):\n    \"\"\"RPN head for PartA2.\n\n    Note:\n        The main difference between the PartA2 RPN head and the Anchor3DHead\n        lies in their output during inference. PartA2 RPN head further returns\n        the original classification score for the second stage since the bbox\n        head in RoI head does not do classification task.\n\n        Different from RPN heads in 2D detectors, this RPN head does\n        multi-class classification task and uses FocalLoss like the SECOND and\n        PointPillars do. But this head uses class agnostic nms rather than\n        multi-class nms.\n\n    Args:\n        num_classes (int): Number of classes.\n        in_channels (int): Number of channels in the input feature map.\n        train_cfg (dict): Train configs.\n        test_cfg (dict): Test configs.\n        feat_channels (int): Number of channels of the feature map.\n        use_direction_classifier (bool): Whether to add a direction classifier.\n        anchor_generator(dict): Config dict of anchor generator.\n        assigner_per_size (bool): Whether to do assignment for each separate\n            anchor size.\n        assign_per_class (bool): Whether to do assignment for each class.\n        diff_rad_by_sin (bool): Whether to change the difference into sin\n            difference for box regression loss.\n        dir_offset (float | int): The offset of BEV rotation angles\n            (TODO: may be moved into box coder)\n        dir_limit_offset (float | int): The limited range of BEV\n            rotation angles. (TODO: may be moved into box coder)\n        bbox_coder (dict): Config dict of box coders.\n        loss_cls (dict): Config of classification loss.\n        loss_bbox (dict): Config of localization loss.\n        loss_dir (dict): Config of direction classifier loss.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 train_cfg,\n                 test_cfg,\n                 feat_channels=256,\n                 use_direction_classifier=True,\n                 anchor_generator=dict(\n                     type='Anchor3DRangeGenerator',\n                     range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],\n                     strides=[2],\n                     sizes=[[3.9, 1.6, 1.56]],\n                     rotations=[0, 1.57],\n                     custom_values=[],\n                     reshape_out=False),\n                 assigner_per_size=False,\n                 assign_per_class=False,\n                 diff_rad_by_sin=True,\n                 dir_offset=-np.pi / 2,\n                 dir_limit_offset=0,\n                 bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n                 loss_cls=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     loss_weight=1.0),\n                 loss_bbox=dict(\n                     type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),\n                 loss_dir=dict(type='CrossEntropyLoss', loss_weight=0.2),\n                 init_cfg=None):\n        super().__init__(num_classes, in_channels, train_cfg, test_cfg,\n                         feat_channels, use_direction_classifier,\n                         anchor_generator, assigner_per_size, assign_per_class,\n                         diff_rad_by_sin, dir_offset, dir_limit_offset,\n                         bbox_coder, loss_cls, loss_bbox, loss_dir, init_cfg)\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             dir_cls_preds,\n             gt_bboxes,\n             gt_labels,\n             input_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Calculate losses.\n\n        Args:\n            cls_scores (list[torch.Tensor]): Multi-level class scores.\n            bbox_preds (list[torch.Tensor]): Multi-level bbox predictions.\n            dir_cls_preds (list[torch.Tensor]): Multi-level direction\n                class predictions.\n            gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Ground truth boxes\n                of each sample.\n            gt_labels (list[torch.Tensor]): Labels of each sample.\n            input_metas (list[dict]): Point cloud and image's meta info.\n            gt_bboxes_ignore (list[torch.Tensor]): Specify\n                which bounding.\n\n        Returns:\n            dict[str, list[torch.Tensor]]: Classification, bbox, and\n                direction losses of each level.\n\n                - loss_rpn_cls (list[torch.Tensor]): Classification losses.\n                - loss_rpn_bbox (list[torch.Tensor]): Box regression losses.\n                - loss_rpn_dir (list[torch.Tensor]): Direction classification\n                    losses.\n        \"\"\"\n        loss_dict = super().loss(cls_scores, bbox_preds, dir_cls_preds,\n                                 gt_bboxes, gt_labels, input_metas,\n                                 gt_bboxes_ignore)\n        # change the loss key names to avoid conflict\n        return dict(\n            loss_rpn_cls=loss_dict['loss_cls'],\n            loss_rpn_bbox=loss_dict['loss_bbox'],\n            loss_rpn_dir=loss_dict['loss_dir'])\n\n    def get_bboxes_single(self,\n                          cls_scores,\n                          bbox_preds,\n                          dir_cls_preds,\n                          mlvl_anchors,\n                          input_meta,\n                          cfg,\n                          rescale=False):\n        \"\"\"Get bboxes of single branch.\n\n        Args:\n            cls_scores (torch.Tensor): Class score in single batch.\n            bbox_preds (torch.Tensor): Bbox prediction in single batch.\n            dir_cls_preds (torch.Tensor): Predictions of direction class\n                in single batch.\n            mlvl_anchors (List[torch.Tensor]): Multi-level anchors\n                in single batch.\n            input_meta (list[dict]): Contain pcd and img's meta info.\n            cfg (:obj:`ConfigDict`): Training or testing config.\n            rescale (list[torch.Tensor]): whether th rescale bbox.\n\n        Returns:\n            dict: Predictions of single batch containing the following keys:\n\n                - boxes_3d (:obj:`BaseInstance3DBoxes`): Predicted 3d bboxes.\n                - scores_3d (torch.Tensor): Score of each bbox.\n                - labels_3d (torch.Tensor): Label of each bbox.\n                - cls_preds (torch.Tensor): Class score of each bbox.\n        \"\"\"\n        assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)\n        mlvl_bboxes = []\n        mlvl_max_scores = []\n        mlvl_label_pred = []\n        mlvl_dir_scores = []\n        mlvl_cls_score = []\n        for cls_score, bbox_pred, dir_cls_pred, anchors in zip(\n                cls_scores, bbox_preds, dir_cls_preds, mlvl_anchors):\n            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]\n            assert cls_score.size()[-2:] == dir_cls_pred.size()[-2:]\n            dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2)\n            dir_cls_score = torch.max(dir_cls_pred, dim=-1)[1]\n\n            cls_score = cls_score.permute(1, 2,\n                                          0).reshape(-1, self.num_classes)\n\n            if self.use_sigmoid_cls:\n                scores = cls_score.sigmoid()\n            else:\n                scores = cls_score.softmax(-1)\n            bbox_pred = bbox_pred.permute(1, 2,\n                                          0).reshape(-1, self.box_code_size)\n\n            nms_pre = cfg.get('nms_pre', -1)\n            if self.use_sigmoid_cls:\n                max_scores, pred_labels = scores.max(dim=1)\n            else:\n                max_scores, pred_labels = scores[:, :-1].max(dim=1)\n            # get topk\n            if nms_pre > 0 and scores.shape[0] > nms_pre:\n                topk_scores, topk_inds = max_scores.topk(nms_pre)\n                anchors = anchors[topk_inds, :]\n                bbox_pred = bbox_pred[topk_inds, :]\n                max_scores = topk_scores\n                cls_score = scores[topk_inds, :]\n                dir_cls_score = dir_cls_score[topk_inds]\n                pred_labels = pred_labels[topk_inds]\n\n            bboxes = self.bbox_coder.decode(anchors, bbox_pred)\n            mlvl_bboxes.append(bboxes)\n            mlvl_max_scores.append(max_scores)\n            mlvl_cls_score.append(cls_score)\n            mlvl_label_pred.append(pred_labels)\n            mlvl_dir_scores.append(dir_cls_score)\n\n        mlvl_bboxes = torch.cat(mlvl_bboxes)\n        mlvl_bboxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](\n            mlvl_bboxes, box_dim=self.box_code_size).bev)\n        mlvl_max_scores = torch.cat(mlvl_max_scores)\n        mlvl_label_pred = torch.cat(mlvl_label_pred)\n        mlvl_dir_scores = torch.cat(mlvl_dir_scores)\n        # shape [k, num_class] before sigmoid\n        # PartA2 need to keep raw classification score\n        # because the bbox head in the second stage does not have\n        # classification branch,\n        # roi head need this score as classification score\n        mlvl_cls_score = torch.cat(mlvl_cls_score)\n\n        score_thr = cfg.get('score_thr', 0)\n        result = self.class_agnostic_nms(mlvl_bboxes, mlvl_bboxes_for_nms,\n                                         mlvl_max_scores, mlvl_label_pred,\n                                         mlvl_cls_score, mlvl_dir_scores,\n                                         score_thr, cfg.nms_post, cfg,\n                                         input_meta)\n\n        return result\n\n    def class_agnostic_nms(self, mlvl_bboxes, mlvl_bboxes_for_nms,\n                           mlvl_max_scores, mlvl_label_pred, mlvl_cls_score,\n                           mlvl_dir_scores, score_thr, max_num, cfg,\n                           input_meta):\n        \"\"\"Class agnostic nms for single batch.\n\n        Args:\n            mlvl_bboxes (torch.Tensor): Bboxes from Multi-level.\n            mlvl_bboxes_for_nms (torch.Tensor): Bboxes for nms\n                (bev or minmax boxes) from Multi-level.\n            mlvl_max_scores (torch.Tensor): Max scores of Multi-level bbox.\n            mlvl_label_pred (torch.Tensor): Class predictions\n                of Multi-level bbox.\n            mlvl_cls_score (torch.Tensor): Class scores of\n                Multi-level bbox.\n            mlvl_dir_scores (torch.Tensor): Direction scores of\n                Multi-level bbox.\n            score_thr (int): Score threshold.\n            max_num (int): Max number of bboxes after nms.\n            cfg (:obj:`ConfigDict`): Training or testing config.\n            input_meta (dict): Contain pcd and img's meta info.\n\n        Returns:\n            dict: Predictions of single batch. Contain the keys:\n\n                - boxes_3d (:obj:`BaseInstance3DBoxes`): Predicted 3d bboxes.\n                - scores_3d (torch.Tensor): Score of each bbox.\n                - labels_3d (torch.Tensor): Label of each bbox.\n                - cls_preds (torch.Tensor): Class score of each bbox.\n        \"\"\"\n        bboxes = []\n        scores = []\n        labels = []\n        dir_scores = []\n        cls_scores = []\n        score_thr_inds = mlvl_max_scores > score_thr\n        _scores = mlvl_max_scores[score_thr_inds]\n        _bboxes_for_nms = mlvl_bboxes_for_nms[score_thr_inds, :]\n        if cfg.use_rotate_nms:\n            nms_func = nms_bev\n        else:\n            nms_func = nms_normal_bev\n        selected = nms_func(_bboxes_for_nms, _scores, cfg.nms_thr)\n\n        _mlvl_bboxes = mlvl_bboxes[score_thr_inds, :]\n        _mlvl_dir_scores = mlvl_dir_scores[score_thr_inds]\n        _mlvl_label_pred = mlvl_label_pred[score_thr_inds]\n        _mlvl_cls_score = mlvl_cls_score[score_thr_inds]\n\n        if len(selected) > 0:\n            bboxes.append(_mlvl_bboxes[selected])\n            scores.append(_scores[selected])\n            labels.append(_mlvl_label_pred[selected])\n            cls_scores.append(_mlvl_cls_score[selected])\n            dir_scores.append(_mlvl_dir_scores[selected])\n            dir_rot = limit_period(bboxes[-1][..., 6] - self.dir_offset,\n                                   self.dir_limit_offset, np.pi)\n            bboxes[-1][..., 6] = (\n                dir_rot + self.dir_offset +\n                np.pi * dir_scores[-1].to(bboxes[-1].dtype))\n\n        if bboxes:\n            bboxes = torch.cat(bboxes, dim=0)\n            scores = torch.cat(scores, dim=0)\n            cls_scores = torch.cat(cls_scores, dim=0)\n            labels = torch.cat(labels, dim=0)\n            if bboxes.shape[0] > max_num:\n                _, inds = scores.sort(descending=True)\n                inds = inds[:max_num]\n                bboxes = bboxes[inds, :]\n                labels = labels[inds]\n                scores = scores[inds]\n                cls_scores = cls_scores[inds]\n            bboxes = input_meta['box_type_3d'](\n                bboxes, box_dim=self.box_code_size)\n            return dict(\n                boxes_3d=bboxes,\n                scores_3d=scores,\n                labels_3d=labels,\n                cls_preds=cls_scores  # raw scores [max_num, cls_num]\n            )\n        else:\n            return dict(\n                boxes_3d=input_meta['box_type_3d'](\n                    mlvl_bboxes.new_zeros([0, self.box_code_size]),\n                    box_dim=self.box_code_size),\n                scores_3d=mlvl_bboxes.new_zeros([0]),\n                labels_3d=mlvl_bboxes.new_zeros([0]),\n                cls_preds=mlvl_bboxes.new_zeros([0, mlvl_cls_score.shape[-1]]))\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/pgd_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nfrom mmcv.cnn import Scale, bias_init_with_prob, normal_init\nfrom mmcv.runner import force_fp32\nfrom torch import nn as nn\nfrom torch.nn import functional as F\n\nfrom mmdet3d.core import box3d_multiclass_nms, xywhr2xyxyr\nfrom mmdet3d.core.bbox import points_cam2img, points_img2cam\nfrom mmdet.core import distance2bbox, multi_apply\nfrom ..builder import HEADS, build_loss\nfrom .fcos_mono3d_head import FCOSMono3DHead\n\n\n@HEADS.register_module()\nclass PGDHead(FCOSMono3DHead):\n    r\"\"\"Anchor-free head used in `PGD <https://arxiv.org/abs/2107.14160>`_.\n\n    Args:\n        use_depth_classifer (bool, optional): Whether to use depth classifier.\n            Defaults to True.\n        use_only_reg_proj (bool, optional): Whether to use only direct\n            regressed depth in the re-projection (to make the network easier\n            to learn). Defaults to False.\n        weight_dim (int, optional): Dimension of the location-aware weight\n            map. Defaults to -1.\n        weight_branch (tuple[tuple[int]], optional): Feature map channels of\n            the convolutional branch for weight map. Defaults to ((256, ), ).\n        depth_branch (tuple[int], optional): Feature map channels of the\n            branch for probabilistic depth estimation. Defaults to (64, ),\n        depth_range (tuple[float], optional): Range of depth estimation.\n            Defaults to (0, 70),\n        depth_unit (int, optional): Unit of depth range division. Defaults to\n            10.\n        division (str, optional): Depth division method. Options include\n            'uniform', 'linear', 'log', 'loguniform'. Defaults to 'uniform'.\n        depth_bins (int, optional): Discrete bins of depth division. Defaults\n            to 8.\n        loss_depth (dict, optional): Depth loss. Defaults to dict(\n            type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0).\n        loss_bbox2d (dict, optional): Loss for 2D box estimation. Defaults to\n            dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0).\n        loss_consistency (dict, optional): Consistency loss. Defaults to\n            dict(type='GIoULoss', loss_weight=1.0),\n        pred_velo (bool, optional): Whether to predict velocity. Defaults to\n            False.\n        pred_bbox2d (bool, optional): Whether to predict 2D bounding boxes.\n            Defaults to True.\n        pred_keypoints (bool, optional): Whether to predict keypoints.\n            Defaults to False,\n        bbox_coder (dict, optional): Bounding box coder. Defaults to\n            dict(type='PGDBBoxCoder', base_depths=((28.01, 16.32), ),\n            base_dims=((0.8, 1.73, 0.6), (1.76, 1.73, 0.6), (3.9, 1.56, 1.6)),\n            code_size=7).\n    \"\"\"\n\n    def __init__(self,\n                 use_depth_classifier=True,\n                 use_onlyreg_proj=False,\n                 weight_dim=-1,\n                 weight_branch=((256, ), ),\n                 depth_branch=(64, ),\n                 depth_range=(0, 70),\n                 depth_unit=10,\n                 division='uniform',\n                 depth_bins=8,\n                 loss_depth=dict(\n                     type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),\n                 loss_bbox2d=dict(\n                     type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),\n                 loss_consistency=dict(type='GIoULoss', loss_weight=1.0),\n                 pred_bbox2d=True,\n                 pred_keypoints=False,\n                 bbox_coder=dict(\n                     type='PGDBBoxCoder',\n                     base_depths=((28.01, 16.32), ),\n                     base_dims=((0.8, 1.73, 0.6), (1.76, 1.73, 0.6),\n                                (3.9, 1.56, 1.6)),\n                     code_size=7),\n                 **kwargs):\n        self.use_depth_classifier = use_depth_classifier\n        self.use_onlyreg_proj = use_onlyreg_proj\n        self.depth_branch = depth_branch\n        self.pred_keypoints = pred_keypoints\n        self.weight_dim = weight_dim\n        self.weight_branch = weight_branch\n        self.weight_out_channels = []\n        for weight_branch_channels in weight_branch:\n            if len(weight_branch_channels) > 0:\n                self.weight_out_channels.append(weight_branch_channels[-1])\n            else:\n                self.weight_out_channels.append(-1)\n        self.depth_range = depth_range\n        self.depth_unit = depth_unit\n        self.division = division\n        if self.division == 'uniform':\n            self.num_depth_cls = int(\n                (depth_range[1] - depth_range[0]) / depth_unit) + 1\n            if self.num_depth_cls != depth_bins:\n                print('Warning: The number of bins computed from ' +\n                      'depth_unit is different from given parameter! ' +\n                      'Depth_unit will be considered with priority in ' +\n                      'Uniform Division.')\n        else:\n            self.num_depth_cls = depth_bins\n        super().__init__(\n            pred_bbox2d=pred_bbox2d, bbox_coder=bbox_coder, **kwargs)\n        self.loss_depth = build_loss(loss_depth)\n        if self.pred_bbox2d:\n            self.loss_bbox2d = build_loss(loss_bbox2d)\n            self.loss_consistency = build_loss(loss_consistency)\n        if self.pred_keypoints:\n            self.kpts_start = 9 if self.pred_velo else 7\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the head.\"\"\"\n        super()._init_layers()\n        if self.pred_bbox2d:\n            self.scale_dim += 1\n        if self.pred_keypoints:\n            self.scale_dim += 1\n        self.scales = nn.ModuleList([\n            nn.ModuleList([Scale(1.0) for _ in range(self.scale_dim)])\n            for _ in self.strides\n        ])\n\n    def _init_predictor(self):\n        \"\"\"Initialize predictor layers of the head.\"\"\"\n        super()._init_predictor()\n\n        if self.use_depth_classifier:\n            self.conv_depth_cls_prev = self._init_branch(\n                conv_channels=self.depth_branch,\n                conv_strides=(1, ) * len(self.depth_branch))\n            self.conv_depth_cls = nn.Conv2d(self.depth_branch[-1],\n                                            self.num_depth_cls, 1)\n            # Data-agnostic single param lambda for local depth fusion\n            self.fuse_lambda = nn.Parameter(torch.tensor(10e-5))\n\n        if self.weight_dim != -1:\n            self.conv_weight_prevs = nn.ModuleList()\n            self.conv_weights = nn.ModuleList()\n            for i in range(self.weight_dim):\n                weight_branch_channels = self.weight_branch[i]\n                weight_out_channel = self.weight_out_channels[i]\n                if len(weight_branch_channels) > 0:\n                    self.conv_weight_prevs.append(\n                        self._init_branch(\n                            conv_channels=weight_branch_channels,\n                            conv_strides=(1, ) * len(weight_branch_channels)))\n                    self.conv_weights.append(\n                        nn.Conv2d(weight_out_channel, 1, 1))\n                else:\n                    self.conv_weight_prevs.append(None)\n                    self.conv_weights.append(\n                        nn.Conv2d(self.feat_channels, 1, 1))\n\n    def init_weights(self):\n        \"\"\"Initialize weights of the head.\n\n        We currently still use the customized defined init_weights because the\n        default init of DCN triggered by the init_cfg will init\n        conv_offset.weight, which mistakenly affects the training stability.\n        \"\"\"\n        super().init_weights()\n\n        bias_cls = bias_init_with_prob(0.01)\n        if self.use_depth_classifier:\n            for m in self.conv_depth_cls_prev:\n                if isinstance(m.conv, nn.Conv2d):\n                    normal_init(m.conv, std=0.01)\n            normal_init(self.conv_depth_cls, std=0.01, bias=bias_cls)\n\n        if self.weight_dim != -1:\n            for conv_weight_prev in self.conv_weight_prevs:\n                if conv_weight_prev is None:\n                    continue\n                for m in conv_weight_prev:\n                    if isinstance(m.conv, nn.Conv2d):\n                        normal_init(m.conv, std=0.01)\n            for conv_weight in self.conv_weights:\n                normal_init(conv_weight, std=0.01)\n\n    def forward(self, feats):\n        \"\"\"Forward features from the upstream network.\n\n        Args:\n            feats (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n\n        Returns:\n            tuple:\n                cls_scores (list[Tensor]): Box scores for each scale level,\n                    each is a 4D-tensor, the channel number is\n                    num_points * num_classes.\n                bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                    level, each is a 4D-tensor, the channel number is\n                    num_points * bbox_code_size.\n                dir_cls_preds (list[Tensor]): Box scores for direction class\n                    predictions on each scale level, each is a 4D-tensor,\n                    the channel number is num_points * 2. (bin = 2).\n                weight (list[Tensor]): Location-aware weight maps on each\n                    scale level, each is a 4D-tensor, the channel number is\n                    num_points * 1.\n                depth_cls_preds (list[Tensor]): Box scores for depth class\n                    predictions on each scale level, each is a 4D-tensor,\n                    the channel number is num_points * self.num_depth_cls.\n                attr_preds (list[Tensor]): Attribute scores for each scale\n                    level, each is a 4D-tensor, the channel number is\n                    num_points * num_attrs.\n                centernesses (list[Tensor]): Centerness for each scale level,\n                    each is a 4D-tensor, the channel number is num_points * 1.\n        \"\"\"\n        return multi_apply(self.forward_single, feats, self.scales,\n                           self.strides)\n\n    def forward_single(self, x, scale, stride):\n        \"\"\"Forward features of a single scale level.\n\n        Args:\n            x (Tensor): FPN feature maps of the specified stride.\n            scale (:obj: `mmcv.cnn.Scale`): Learnable scale module to resize\n                the bbox prediction.\n            stride (int): The corresponding stride for feature maps, only\n                used to normalize the bbox prediction when self.norm_on_bbox\n                is True.\n\n        Returns:\n            tuple: scores for each class, bbox and direction class\n                predictions, depth class predictions, location-aware weights,\n                attribute and centerness predictions of input feature maps.\n        \"\"\"\n        cls_score, bbox_pred, dir_cls_pred, attr_pred, centerness, cls_feat, \\\n            reg_feat = super().forward_single(x, scale, stride)\n\n        max_regress_range = stride * self.regress_ranges[0][1] / \\\n            self.strides[0]\n        bbox_pred = self.bbox_coder.decode_2d(bbox_pred, scale, stride,\n                                              max_regress_range, self.training,\n                                              self.pred_keypoints,\n                                              self.pred_bbox2d)\n\n        depth_cls_pred = None\n        if self.use_depth_classifier:\n            clone_reg_feat = reg_feat.clone()\n            for conv_depth_cls_prev_layer in self.conv_depth_cls_prev:\n                clone_reg_feat = conv_depth_cls_prev_layer(clone_reg_feat)\n            depth_cls_pred = self.conv_depth_cls(clone_reg_feat)\n\n        weight = None\n        if self.weight_dim != -1:\n            weight = []\n            for i in range(self.weight_dim):\n                clone_reg_feat = reg_feat.clone()\n                if len(self.weight_branch[i]) > 0:\n                    for conv_weight_prev_layer in self.conv_weight_prevs[i]:\n                        clone_reg_feat = conv_weight_prev_layer(clone_reg_feat)\n                weight.append(self.conv_weights[i](clone_reg_feat))\n            weight = torch.cat(weight, dim=1)\n\n        return cls_score, bbox_pred, dir_cls_pred, depth_cls_pred, weight, \\\n            attr_pred, centerness\n\n    def get_proj_bbox2d(self,\n                        bbox_preds,\n                        pos_dir_cls_preds,\n                        labels_3d,\n                        bbox_targets_3d,\n                        pos_points,\n                        pos_inds,\n                        img_metas,\n                        pos_depth_cls_preds=None,\n                        pos_weights=None,\n                        pos_cls_scores=None,\n                        with_kpts=False):\n        \"\"\"Decode box predictions and get projected 2D attributes.\n\n        Args:\n            bbox_preds (list[Tensor]): Box predictions for each scale\n                level, each is a 4D-tensor, the channel number is\n                num_points * bbox_code_size.\n            pos_dir_cls_preds (Tensor): Box scores for direction class\n                predictions of positive boxes on all the scale levels in shape\n                (num_pos_points, 2).\n            labels_3d (list[Tensor]): 3D box category labels for each scale\n                level, each is a 4D-tensor.\n            bbox_targets_3d (list[Tensor]): 3D box targets for each scale\n                level, each is a 4D-tensor, the channel number is\n                num_points * bbox_code_size.\n            pos_points (Tensor): Foreground points.\n            pos_inds (Tensor): Index of foreground points from flattened\n                tensors.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            pos_depth_cls_preds (Tensor, optional): Probabilistic depth map of\n                positive boxes on all the scale levels in shape\n                (num_pos_points, self.num_depth_cls). Defaults to None.\n            pos_weights (Tensor, optional): Location-aware weights of positive\n                boxes in shape (num_pos_points, self.weight_dim). Defaults to\n                None.\n            pos_cls_scores (Tensor, optional): Classification scores of\n                positive boxes in shape (num_pos_points, self.num_classes).\n                Defaults to None.\n            with_kpts (bool, optional): Whether to output keypoints targets.\n                Defaults to False.\n\n        Returns:\n            tuple[Tensor]: Exterior 2D boxes from projected 3D boxes,\n                predicted 2D boxes and keypoint targets (if necessary).\n        \"\"\"\n        views = [np.array(img_meta['cam2img']) for img_meta in img_metas]\n        num_imgs = len(img_metas)\n        img_idx = []\n        for label in labels_3d:\n            for idx in range(num_imgs):\n                img_idx.append(\n                    labels_3d[0].new_ones(int(len(label) / num_imgs)) * idx)\n        img_idx = torch.cat(img_idx)\n        pos_img_idx = img_idx[pos_inds]\n\n        flatten_strided_bbox_preds = []\n        flatten_strided_bbox2d_preds = []\n        flatten_bbox_targets_3d = []\n        flatten_strides = []\n\n        for stride_idx, bbox_pred in enumerate(bbox_preds):\n            flatten_bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(\n                -1, sum(self.group_reg_dims))\n            flatten_bbox_pred[:, :2] *= self.strides[stride_idx]\n            flatten_bbox_pred[:, -4:] *= self.strides[stride_idx]\n            flatten_strided_bbox_preds.append(\n                flatten_bbox_pred[:, :self.bbox_coder.bbox_code_size])\n            flatten_strided_bbox2d_preds.append(flatten_bbox_pred[:, -4:])\n\n            bbox_target_3d = bbox_targets_3d[stride_idx].clone()\n            bbox_target_3d[:, :2] *= self.strides[stride_idx]\n            bbox_target_3d[:, -4:] *= self.strides[stride_idx]\n            flatten_bbox_targets_3d.append(bbox_target_3d)\n\n            flatten_stride = flatten_bbox_pred.new_ones(\n                *flatten_bbox_pred.shape[:-1], 1) * self.strides[stride_idx]\n            flatten_strides.append(flatten_stride)\n\n        flatten_strided_bbox_preds = torch.cat(flatten_strided_bbox_preds)\n        flatten_strided_bbox2d_preds = torch.cat(flatten_strided_bbox2d_preds)\n        flatten_bbox_targets_3d = torch.cat(flatten_bbox_targets_3d)\n        flatten_strides = torch.cat(flatten_strides)\n        pos_strided_bbox_preds = flatten_strided_bbox_preds[pos_inds]\n        pos_strided_bbox2d_preds = flatten_strided_bbox2d_preds[pos_inds]\n        pos_bbox_targets_3d = flatten_bbox_targets_3d[pos_inds]\n        pos_strides = flatten_strides[pos_inds]\n\n        pos_decoded_bbox2d_preds = distance2bbox(pos_points,\n                                                 pos_strided_bbox2d_preds)\n\n        pos_strided_bbox_preds[:, :2] = \\\n            pos_points - pos_strided_bbox_preds[:, :2]\n        pos_bbox_targets_3d[:, :2] = \\\n            pos_points - pos_bbox_targets_3d[:, :2]\n\n        if self.use_depth_classifier and (not self.use_onlyreg_proj):\n            pos_prob_depth_preds = self.bbox_coder.decode_prob_depth(\n                pos_depth_cls_preds, self.depth_range, self.depth_unit,\n                self.division, self.num_depth_cls)\n            sig_alpha = torch.sigmoid(self.fuse_lambda)\n            pos_strided_bbox_preds[:, 2] = \\\n                sig_alpha * pos_strided_bbox_preds.clone()[:, 2] + \\\n                (1 - sig_alpha) * pos_prob_depth_preds\n\n        box_corners_in_image = pos_strided_bbox_preds.new_zeros(\n            (*pos_strided_bbox_preds.shape[:-1], 8, 2))\n        box_corners_in_image_gt = pos_strided_bbox_preds.new_zeros(\n            (*pos_strided_bbox_preds.shape[:-1], 8, 2))\n\n        for idx in range(num_imgs):\n            mask = (pos_img_idx == idx)\n            if pos_strided_bbox_preds[mask].shape[0] == 0:\n                continue\n            cam2img = torch.eye(\n                4,\n                dtype=pos_strided_bbox_preds.dtype,\n                device=pos_strided_bbox_preds.device)\n            view_shape = views[idx].shape\n            cam2img[:view_shape[0], :view_shape[1]] = \\\n                pos_strided_bbox_preds.new_tensor(views[idx])\n\n            centers2d_preds = pos_strided_bbox_preds.clone()[mask, :2]\n            centers2d_targets = pos_bbox_targets_3d.clone()[mask, :2]\n            centers3d_targets = points_img2cam(pos_bbox_targets_3d[mask, :3],\n                                               views[idx])\n\n            # use predicted depth to re-project the 2.5D centers\n            pos_strided_bbox_preds[mask, :3] = points_img2cam(\n                pos_strided_bbox_preds[mask, :3], views[idx])\n            pos_bbox_targets_3d[mask, :3] = centers3d_targets\n\n            # depth fixed when computing re-project 3D bboxes\n            pos_strided_bbox_preds[mask, 2] = \\\n                pos_bbox_targets_3d.clone()[mask, 2]\n\n            # decode yaws\n            if self.use_direction_classifier:\n                pos_dir_cls_scores = torch.max(\n                    pos_dir_cls_preds[mask], dim=-1)[1]\n                pos_strided_bbox_preds[mask] = self.bbox_coder.decode_yaw(\n                    pos_strided_bbox_preds[mask], centers2d_preds,\n                    pos_dir_cls_scores, self.dir_offset, cam2img)\n            pos_bbox_targets_3d[mask, 6] = torch.atan2(\n                centers2d_targets[:, 0] - cam2img[0, 2],\n                cam2img[0, 0]) + pos_bbox_targets_3d[mask, 6]\n\n            corners = img_metas[0]['box_type_3d'](\n                pos_strided_bbox_preds[mask],\n                box_dim=self.bbox_coder.bbox_code_size,\n                origin=(0.5, 0.5, 0.5)).corners\n            box_corners_in_image[mask] = points_cam2img(corners, cam2img)\n\n            corners_gt = img_metas[0]['box_type_3d'](\n                pos_bbox_targets_3d[mask, :self.bbox_code_size],\n                box_dim=self.bbox_coder.bbox_code_size,\n                origin=(0.5, 0.5, 0.5)).corners\n            box_corners_in_image_gt[mask] = points_cam2img(corners_gt, cam2img)\n\n        minxy = torch.min(box_corners_in_image, dim=1)[0]\n        maxxy = torch.max(box_corners_in_image, dim=1)[0]\n        proj_bbox2d_preds = torch.cat([minxy, maxxy], dim=1)\n\n        outputs = (proj_bbox2d_preds, pos_decoded_bbox2d_preds)\n\n        if with_kpts:\n            norm_strides = pos_strides * self.regress_ranges[0][1] / \\\n                self.strides[0]\n            kpts_targets = box_corners_in_image_gt - pos_points[..., None, :]\n            kpts_targets = kpts_targets.view(\n                (*pos_strided_bbox_preds.shape[:-1], 16))\n            kpts_targets /= norm_strides\n\n            outputs += (kpts_targets, )\n\n        return outputs\n\n    def get_pos_predictions(self, bbox_preds, dir_cls_preds, depth_cls_preds,\n                            weights, attr_preds, centernesses, pos_inds,\n                            img_metas):\n        \"\"\"Flatten predictions and get positive ones.\n\n        Args:\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level, each is a 4D-tensor, the channel number is\n                num_points * bbox_code_size.\n            dir_cls_preds (list[Tensor]): Box scores for direction class\n                predictions on each scale level, each is a 4D-tensor,\n                the channel number is num_points * 2. (bin = 2)\n            depth_cls_preds (list[Tensor]): Box scores for direction class\n                predictions on each scale level, each is a 4D-tensor,\n                the channel number is num_points * self.num_depth_cls.\n            attr_preds (list[Tensor]): Attribute scores for each scale level,\n                each is a 4D-tensor, the channel number is\n                num_points * num_attrs.\n            centernesses (list[Tensor]): Centerness for each scale level, each\n                is a 4D-tensor, the channel number is num_points * 1.\n            pos_inds (Tensor): Index of foreground points from flattened\n                tensors.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n\n        Returns:\n            tuple[Tensor]: Box predictions, direction classes, probabilistic\n                depth maps, location-aware weight maps, attributes and\n                centerness predictions.\n        \"\"\"\n        flatten_bbox_preds = [\n            bbox_pred.permute(0, 2, 3, 1).reshape(-1, sum(self.group_reg_dims))\n            for bbox_pred in bbox_preds\n        ]\n        flatten_dir_cls_preds = [\n            dir_cls_pred.permute(0, 2, 3, 1).reshape(-1, 2)\n            for dir_cls_pred in dir_cls_preds\n        ]\n        flatten_centerness = [\n            centerness.permute(0, 2, 3, 1).reshape(-1)\n            for centerness in centernesses\n        ]\n        flatten_bbox_preds = torch.cat(flatten_bbox_preds)\n        flatten_dir_cls_preds = torch.cat(flatten_dir_cls_preds)\n        flatten_centerness = torch.cat(flatten_centerness)\n        pos_bbox_preds = flatten_bbox_preds[pos_inds]\n        pos_dir_cls_preds = flatten_dir_cls_preds[pos_inds]\n        pos_centerness = flatten_centerness[pos_inds]\n\n        pos_depth_cls_preds = None\n        if self.use_depth_classifier:\n            flatten_depth_cls_preds = [\n                depth_cls_pred.permute(0, 2, 3,\n                                       1).reshape(-1, self.num_depth_cls)\n                for depth_cls_pred in depth_cls_preds\n            ]\n            flatten_depth_cls_preds = torch.cat(flatten_depth_cls_preds)\n            pos_depth_cls_preds = flatten_depth_cls_preds[pos_inds]\n\n        pos_weights = None\n        if self.weight_dim != -1:\n            flatten_weights = [\n                weight.permute(0, 2, 3, 1).reshape(-1, self.weight_dim)\n                for weight in weights\n            ]\n            flatten_weights = torch.cat(flatten_weights)\n            pos_weights = flatten_weights[pos_inds]\n\n        pos_attr_preds = None\n        if self.pred_attrs:\n            flatten_attr_preds = [\n                attr_pred.permute(0, 2, 3, 1).reshape(-1, self.num_attrs)\n                for attr_pred in attr_preds\n            ]\n            flatten_attr_preds = torch.cat(flatten_attr_preds)\n            pos_attr_preds = flatten_attr_preds[pos_inds]\n\n        return pos_bbox_preds, pos_dir_cls_preds, pos_depth_cls_preds, \\\n            pos_weights, pos_attr_preds, pos_centerness\n\n    @force_fp32(\n        apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds',\n                  'depth_cls_preds', 'weights', 'attr_preds', 'centernesses'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             dir_cls_preds,\n             depth_cls_preds,\n             weights,\n             attr_preds,\n             centernesses,\n             gt_bboxes,\n             gt_labels,\n             gt_bboxes_3d,\n             gt_labels_3d,\n             centers2d,\n             depths,\n             attr_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute loss of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level,\n                each is a 4D-tensor, the channel number is\n                num_points * num_classes.\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level, each is a 4D-tensor, the channel number is\n                num_points * bbox_code_size.\n            dir_cls_preds (list[Tensor]): Box scores for direction class\n                predictions on each scale level, each is a 4D-tensor,\n                the channel number is num_points * 2. (bin = 2)\n            depth_cls_preds (list[Tensor]): Box scores for direction class\n                predictions on each scale level, each is a 4D-tensor,\n                the channel number is num_points * self.num_depth_cls.\n            weights (list[Tensor]): Location-aware weights for each scale\n                level, each is a 4D-tensor, the channel number is\n                num_points * self.weight_dim.\n            attr_preds (list[Tensor]): Attribute scores for each scale level,\n                each is a 4D-tensor, the channel number is\n                num_points * num_attrs.\n            centernesses (list[Tensor]): Centerness for each scale level, each\n                is a 4D-tensor, the channel number is num_points * 1.\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            gt_bboxes_3d (list[Tensor]): 3D boxes ground truth with shape of\n                (num_gts, code_size).\n            gt_labels_3d (list[Tensor]): same as gt_labels\n            centers2d (list[Tensor]): 2D centers on the image with shape of\n                (num_gts, 2).\n            depths (list[Tensor]): Depth ground truth with shape of\n                (num_gts, ).\n            attr_labels (list[Tensor]): Attributes indices of each box.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (list[Tensor]): specify which bounding boxes can\n                be ignored when computing the loss. Defaults to None.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        assert len(cls_scores) == len(bbox_preds) == len(dir_cls_preds) == \\\n            len(depth_cls_preds) == len(weights) == len(centernesses) == \\\n            len(attr_preds), 'The length of cls_scores, bbox_preds, ' \\\n            'dir_cls_preds, depth_cls_preds, weights, centernesses, and' \\\n            f'attr_preds: {len(cls_scores)}, {len(bbox_preds)}, ' \\\n            f'{len(dir_cls_preds)}, {len(depth_cls_preds)}, {len(weights)}' \\\n            f'{len(centernesses)}, {len(attr_preds)} are inconsistent.'\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        all_level_points = self.get_points(featmap_sizes, bbox_preds[0].dtype,\n                                           bbox_preds[0].device)\n        labels_3d, bbox_targets_3d, centerness_targets, attr_targets = \\\n            self.get_targets(\n                all_level_points, gt_bboxes, gt_labels, gt_bboxes_3d,\n                gt_labels_3d, centers2d, depths, attr_labels)\n\n        num_imgs = cls_scores[0].size(0)\n        # flatten cls_scores and targets\n        flatten_cls_scores = [\n            cls_score.permute(0, 2, 3, 1).reshape(-1, self.cls_out_channels)\n            for cls_score in cls_scores\n        ]\n        flatten_cls_scores = torch.cat(flatten_cls_scores)\n        flatten_labels_3d = torch.cat(labels_3d)\n        flatten_bbox_targets_3d = torch.cat(bbox_targets_3d)\n        flatten_centerness_targets = torch.cat(centerness_targets)\n        flatten_points = torch.cat(\n            [points.repeat(num_imgs, 1) for points in all_level_points])\n        if self.pred_attrs:\n            flatten_attr_targets = torch.cat(attr_targets)\n\n        # FG cat_id: [0, num_classes -1], BG cat_id: num_classes\n        bg_class_ind = self.num_classes\n        pos_inds = ((flatten_labels_3d >= 0)\n                    & (flatten_labels_3d < bg_class_ind)).nonzero().reshape(-1)\n        num_pos = len(pos_inds)\n\n        loss_dict = dict()\n\n        loss_dict['loss_cls'] = self.loss_cls(\n            flatten_cls_scores,\n            flatten_labels_3d,\n            avg_factor=num_pos + num_imgs)  # avoid num_pos is 0\n\n        pos_bbox_preds, pos_dir_cls_preds, pos_depth_cls_preds, pos_weights, \\\n            pos_attr_preds, pos_centerness = self.get_pos_predictions(\n                bbox_preds, dir_cls_preds, depth_cls_preds, weights,\n                attr_preds, centernesses, pos_inds, img_metas)\n\n        if num_pos > 0:\n            pos_bbox_targets_3d = flatten_bbox_targets_3d[pos_inds]\n            pos_centerness_targets = flatten_centerness_targets[pos_inds]\n            pos_points = flatten_points[pos_inds]\n            if self.pred_attrs:\n                pos_attr_targets = flatten_attr_targets[pos_inds]\n            if self.use_direction_classifier:\n                pos_dir_cls_targets = self.get_direction_target(\n                    pos_bbox_targets_3d, self.dir_offset, one_hot=False)\n\n            bbox_weights = pos_centerness_targets.new_ones(\n                len(pos_centerness_targets), sum(self.group_reg_dims))\n            equal_weights = pos_centerness_targets.new_ones(\n                pos_centerness_targets.shape)\n            code_weight = self.train_cfg.get('code_weight', None)\n            if code_weight:\n                assert len(code_weight) == sum(self.group_reg_dims)\n                bbox_weights = bbox_weights * bbox_weights.new_tensor(\n                    code_weight)\n\n            if self.diff_rad_by_sin:\n                pos_bbox_preds, pos_bbox_targets_3d = self.add_sin_difference(\n                    pos_bbox_preds, pos_bbox_targets_3d)\n\n            loss_dict['loss_offset'] = self.loss_bbox(\n                pos_bbox_preds[:, :2],\n                pos_bbox_targets_3d[:, :2],\n                weight=bbox_weights[:, :2],\n                avg_factor=equal_weights.sum())\n            loss_dict['loss_size'] = self.loss_bbox(\n                pos_bbox_preds[:, 3:6],\n                pos_bbox_targets_3d[:, 3:6],\n                weight=bbox_weights[:, 3:6],\n                avg_factor=equal_weights.sum())\n            loss_dict['loss_rotsin'] = self.loss_bbox(\n                pos_bbox_preds[:, 6],\n                pos_bbox_targets_3d[:, 6],\n                weight=bbox_weights[:, 6],\n                avg_factor=equal_weights.sum())\n            if self.pred_velo:\n                loss_dict['loss_velo'] = self.loss_bbox(\n                    pos_bbox_preds[:, 7:9],\n                    pos_bbox_targets_3d[:, 7:9],\n                    weight=bbox_weights[:, 7:9],\n                    avg_factor=equal_weights.sum())\n\n            proj_bbox2d_inputs = (bbox_preds, pos_dir_cls_preds, labels_3d,\n                                  bbox_targets_3d, pos_points, pos_inds,\n                                  img_metas)\n\n            # direction classification loss\n            # TODO: add more check for use_direction_classifier\n            if self.use_direction_classifier:\n                loss_dict['loss_dir'] = self.loss_dir(\n                    pos_dir_cls_preds,\n                    pos_dir_cls_targets,\n                    equal_weights,\n                    avg_factor=equal_weights.sum())\n\n            # init depth loss with the one computed from direct regression\n            loss_dict['loss_depth'] = self.loss_bbox(\n                pos_bbox_preds[:, 2],\n                pos_bbox_targets_3d[:, 2],\n                weight=bbox_weights[:, 2],\n                avg_factor=equal_weights.sum())\n            # depth classification loss\n            if self.use_depth_classifier:\n                pos_prob_depth_preds = self.bbox_coder.decode_prob_depth(\n                    pos_depth_cls_preds, self.depth_range, self.depth_unit,\n                    self.division, self.num_depth_cls)\n                sig_alpha = torch.sigmoid(self.fuse_lambda)\n                if self.weight_dim != -1:\n                    loss_fuse_depth = self.loss_depth(\n                        sig_alpha * pos_bbox_preds[:, 2] +\n                        (1 - sig_alpha) * pos_prob_depth_preds,\n                        pos_bbox_targets_3d[:, 2],\n                        sigma=pos_weights[:, 0],\n                        weight=bbox_weights[:, 2],\n                        avg_factor=equal_weights.sum())\n                else:\n                    loss_fuse_depth = self.loss_depth(\n                        sig_alpha * pos_bbox_preds[:, 2] +\n                        (1 - sig_alpha) * pos_prob_depth_preds,\n                        pos_bbox_targets_3d[:, 2],\n                        weight=bbox_weights[:, 2],\n                        avg_factor=equal_weights.sum())\n                loss_dict['loss_depth'] = loss_fuse_depth\n\n                proj_bbox2d_inputs += (pos_depth_cls_preds, )\n\n            if self.pred_keypoints:\n                # use smoothL1 to compute consistency loss for keypoints\n                # normalize the offsets with strides\n                proj_bbox2d_preds, pos_decoded_bbox2d_preds, kpts_targets = \\\n                    self.get_proj_bbox2d(*proj_bbox2d_inputs, with_kpts=True)\n                loss_dict['loss_kpts'] = self.loss_bbox(\n                    pos_bbox_preds[:, self.kpts_start:self.kpts_start + 16],\n                    kpts_targets,\n                    weight=bbox_weights[:,\n                                        self.kpts_start:self.kpts_start + 16],\n                    avg_factor=equal_weights.sum())\n\n            if self.pred_bbox2d:\n                loss_dict['loss_bbox2d'] = self.loss_bbox2d(\n                    pos_bbox_preds[:, -4:],\n                    pos_bbox_targets_3d[:, -4:],\n                    weight=bbox_weights[:, -4:],\n                    avg_factor=equal_weights.sum())\n                if not self.pred_keypoints:\n                    proj_bbox2d_preds, pos_decoded_bbox2d_preds = \\\n                        self.get_proj_bbox2d(*proj_bbox2d_inputs)\n                loss_dict['loss_consistency'] = self.loss_consistency(\n                    proj_bbox2d_preds,\n                    pos_decoded_bbox2d_preds,\n                    weight=bbox_weights[:, -4:],\n                    avg_factor=equal_weights.sum())\n\n            loss_dict['loss_centerness'] = self.loss_centerness(\n                pos_centerness, pos_centerness_targets)\n\n            # attribute classification loss\n            if self.pred_attrs:\n                loss_dict['loss_attr'] = self.loss_attr(\n                    pos_attr_preds,\n                    pos_attr_targets,\n                    pos_centerness_targets,\n                    avg_factor=pos_centerness_targets.sum())\n\n        else:\n            # need absolute due to possible negative delta x/y\n            loss_dict['loss_offset'] = pos_bbox_preds[:, :2].sum()\n            loss_dict['loss_size'] = pos_bbox_preds[:, 3:6].sum()\n            loss_dict['loss_rotsin'] = pos_bbox_preds[:, 6].sum()\n            loss_dict['loss_depth'] = pos_bbox_preds[:, 2].sum()\n            if self.pred_velo:\n                loss_dict['loss_velo'] = pos_bbox_preds[:, 7:9].sum()\n            if self.pred_keypoints:\n                loss_dict['loss_kpts'] = pos_bbox_preds[:,\n                                                        self.kpts_start:self.\n                                                        kpts_start + 16].sum()\n            if self.pred_bbox2d:\n                loss_dict['loss_bbox2d'] = pos_bbox_preds[:, -4:].sum()\n                loss_dict['loss_consistency'] = pos_bbox_preds[:, -4:].sum()\n            loss_dict['loss_centerness'] = pos_centerness.sum()\n            if self.use_direction_classifier:\n                loss_dict['loss_dir'] = pos_dir_cls_preds.sum()\n            if self.use_depth_classifier:\n                sig_alpha = torch.sigmoid(self.fuse_lambda)\n                loss_fuse_depth = \\\n                    sig_alpha * pos_bbox_preds[:, 2].sum() + \\\n                    (1 - sig_alpha) * pos_depth_cls_preds.sum()\n                if self.weight_dim != -1:\n                    loss_fuse_depth *= torch.exp(-pos_weights[:, 0].sum())\n                loss_dict['loss_depth'] = loss_fuse_depth\n            if self.pred_attrs:\n                loss_dict['loss_attr'] = pos_attr_preds.sum()\n\n        return loss_dict\n\n    @force_fp32(\n        apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds',\n                  'depth_cls_preds', 'weights', 'attr_preds', 'centernesses'))\n    def get_bboxes(self,\n                   cls_scores,\n                   bbox_preds,\n                   dir_cls_preds,\n                   depth_cls_preds,\n                   weights,\n                   attr_preds,\n                   centernesses,\n                   img_metas,\n                   cfg=None,\n                   rescale=None):\n        \"\"\"Transform network output for a batch into bbox predictions.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level\n                Has shape (N, num_points * num_classes, H, W)\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level with shape (N, num_points * 4, H, W)\n            dir_cls_preds (list[Tensor]): Box scores for direction class\n                predictions on each scale level, each is a 4D-tensor,\n                the channel number is num_points * 2. (bin = 2)\n            depth_cls_preds (list[Tensor]): Box scores for direction class\n                predictions on each scale level, each is a 4D-tensor,\n                the channel number is num_points * self.num_depth_cls.\n            weights (list[Tensor]): Location-aware weights for each scale\n                level, each is a 4D-tensor, the channel number is\n                num_points * self.weight_dim.\n            attr_preds (list[Tensor]): Attribute scores for each scale level\n                Has shape (N, num_points * num_attrs, H, W)\n            centernesses (list[Tensor]): Centerness for each scale level with\n                shape (N, num_points * 1, H, W)\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            cfg (mmcv.Config, optional): Test / postprocessing configuration,\n                if None, test_cfg would be used. Defaults to None.\n            rescale (bool, optional): If True, return boxes in original image\n                space. Defaults to None.\n\n        Returns:\n            list[tuple[Tensor]]: Each item in result_list is a tuple, which\n                consists of predicted 3D boxes, scores, labels, attributes and\n                2D boxes (if necessary).\n        \"\"\"\n        assert len(cls_scores) == len(bbox_preds) == len(dir_cls_preds) == \\\n            len(depth_cls_preds) == len(weights) == len(centernesses) == \\\n            len(attr_preds), 'The length of cls_scores, bbox_preds, ' \\\n            'dir_cls_preds, depth_cls_preds, weights, centernesses, and' \\\n            f'attr_preds: {len(cls_scores)}, {len(bbox_preds)}, ' \\\n            f'{len(dir_cls_preds)}, {len(depth_cls_preds)}, {len(weights)}' \\\n            f'{len(centernesses)}, {len(attr_preds)} are inconsistent.'\n        num_levels = len(cls_scores)\n\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        mlvl_points = self.get_points(featmap_sizes, bbox_preds[0].dtype,\n                                      bbox_preds[0].device)\n        result_list = []\n        for img_id in range(len(img_metas)):\n            cls_score_list = [\n                cls_scores[i][img_id].detach() for i in range(num_levels)\n            ]\n            bbox_pred_list = [\n                bbox_preds[i][img_id].detach() for i in range(num_levels)\n            ]\n            if self.use_direction_classifier:\n                dir_cls_pred_list = [\n                    dir_cls_preds[i][img_id].detach()\n                    for i in range(num_levels)\n                ]\n            else:\n                dir_cls_pred_list = [\n                    cls_scores[i][img_id].new_full(\n                        [2, *cls_scores[i][img_id].shape[1:]], 0).detach()\n                    for i in range(num_levels)\n                ]\n            if self.use_depth_classifier:\n                depth_cls_pred_list = [\n                    depth_cls_preds[i][img_id].detach()\n                    for i in range(num_levels)\n                ]\n            else:\n                depth_cls_pred_list = [\n                    cls_scores[i][img_id].new_full(\n                        [self.num_depth_cls, *cls_scores[i][img_id].shape[1:]],\n                        0).detach() for i in range(num_levels)\n                ]\n            if self.weight_dim != -1:\n                weight_list = [\n                    weights[i][img_id].detach() for i in range(num_levels)\n                ]\n            else:\n                weight_list = [\n                    cls_scores[i][img_id].new_full(\n                        [1, *cls_scores[i][img_id].shape[1:]], 0).detach()\n                    for i in range(num_levels)\n                ]\n            if self.pred_attrs:\n                attr_pred_list = [\n                    attr_preds[i][img_id].detach() for i in range(num_levels)\n                ]\n            else:\n                attr_pred_list = [\n                    cls_scores[i][img_id].new_full(\n                        [self.num_attrs, *cls_scores[i][img_id].shape[1:]],\n                        self.attr_background_label).detach()\n                    for i in range(num_levels)\n                ]\n            centerness_pred_list = [\n                centernesses[i][img_id].detach() for i in range(num_levels)\n            ]\n            input_meta = img_metas[img_id]\n            det_bboxes = self._get_bboxes_single(\n                cls_score_list, bbox_pred_list, dir_cls_pred_list,\n                depth_cls_pred_list, weight_list, attr_pred_list,\n                centerness_pred_list, mlvl_points, input_meta, cfg, rescale)\n            result_list.append(det_bboxes)\n        return result_list\n\n    def _get_bboxes_single(self,\n                           cls_scores,\n                           bbox_preds,\n                           dir_cls_preds,\n                           depth_cls_preds,\n                           weights,\n                           attr_preds,\n                           centernesses,\n                           mlvl_points,\n                           input_meta,\n                           cfg,\n                           rescale=False):\n        \"\"\"Transform outputs for a single batch item into bbox predictions.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for a single scale level\n                Has shape (num_points * num_classes, H, W).\n            bbox_preds (list[Tensor]): Box energies / deltas for a single scale\n                level with shape (num_points * bbox_code_size, H, W).\n            dir_cls_preds (list[Tensor]): Box scores for direction class\n                predictions on a single scale level with shape\n                (num_points * 2, H, W)\n            depth_cls_preds (list[Tensor]): Box scores for probabilistic depth\n                predictions on a single scale level with shape\n                (num_points * self.num_depth_cls, H, W)\n            weights (list[Tensor]): Location-aware weight maps on a single\n                scale level with shape (num_points * self.weight_dim, H, W).\n            attr_preds (list[Tensor]): Attribute scores for each scale level\n                Has shape (N, num_points * num_attrs, H, W)\n            centernesses (list[Tensor]): Centerness for a single scale level\n                with shape (num_points, H, W).\n            mlvl_points (list[Tensor]): Box reference for a single scale level\n                with shape (num_total_points, 2).\n            input_meta (dict): Metadata of input image.\n            cfg (mmcv.Config): Test / postprocessing configuration,\n                if None, test_cfg would be used.\n            rescale (bool, optional): If True, return boxes in original image\n                space. Defaults to False.\n\n        Returns:\n            tuples[Tensor]: Predicted 3D boxes, scores, labels, attributes and\n                2D boxes (if necessary).\n        \"\"\"\n        view = np.array(input_meta['cam2img'])\n        scale_factor = input_meta['scale_factor']\n        cfg = self.test_cfg if cfg is None else cfg\n        assert len(cls_scores) == len(bbox_preds) == len(mlvl_points)\n        mlvl_centers2d = []\n        mlvl_bboxes = []\n        mlvl_scores = []\n        mlvl_dir_scores = []\n        mlvl_attr_scores = []\n        mlvl_centerness = []\n        mlvl_depth_cls_scores = []\n        mlvl_depth_uncertainty = []\n        mlvl_bboxes2d = None\n        if self.pred_bbox2d:\n            mlvl_bboxes2d = []\n\n        for cls_score, bbox_pred, dir_cls_pred, depth_cls_pred, weight, \\\n                attr_pred, centerness, points in zip(\n                    cls_scores, bbox_preds, dir_cls_preds, depth_cls_preds,\n                    weights, attr_preds, centernesses, mlvl_points):\n            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]\n            scores = cls_score.permute(1, 2, 0).reshape(\n                -1, self.cls_out_channels).sigmoid()\n            dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2)\n            dir_cls_score = torch.max(dir_cls_pred, dim=-1)[1]\n            depth_cls_pred = depth_cls_pred.permute(1, 2, 0).reshape(\n                -1, self.num_depth_cls)\n            depth_cls_score = F.softmax(\n                depth_cls_pred, dim=-1).topk(\n                    k=2, dim=-1)[0].mean(dim=-1)\n            if self.weight_dim != -1:\n                weight = weight.permute(1, 2, 0).reshape(-1, self.weight_dim)\n            else:\n                weight = weight.permute(1, 2, 0).reshape(-1, 1)\n            depth_uncertainty = torch.exp(-weight[:, -1])\n            attr_pred = attr_pred.permute(1, 2, 0).reshape(-1, self.num_attrs)\n            attr_score = torch.max(attr_pred, dim=-1)[1]\n            centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid()\n\n            bbox_pred = bbox_pred.permute(1, 2,\n                                          0).reshape(-1,\n                                                     sum(self.group_reg_dims))\n            bbox_pred3d = bbox_pred[:, :self.bbox_coder.bbox_code_size]\n            if self.pred_bbox2d:\n                bbox_pred2d = bbox_pred[:, -4:]\n            nms_pre = cfg.get('nms_pre', -1)\n            if nms_pre > 0 and scores.shape[0] > nms_pre:\n                merged_scores = scores * centerness[:, None]\n                if self.use_depth_classifier:\n                    merged_scores *= depth_cls_score[:, None]\n                    if self.weight_dim != -1:\n                        merged_scores *= depth_uncertainty[:, None]\n                max_scores, _ = merged_scores.max(dim=1)\n                _, topk_inds = max_scores.topk(nms_pre)\n                points = points[topk_inds, :]\n                bbox_pred3d = bbox_pred3d[topk_inds, :]\n                scores = scores[topk_inds, :]\n                dir_cls_pred = dir_cls_pred[topk_inds, :]\n                depth_cls_pred = depth_cls_pred[topk_inds, :]\n                centerness = centerness[topk_inds]\n                dir_cls_score = dir_cls_score[topk_inds]\n                depth_cls_score = depth_cls_score[topk_inds]\n                depth_uncertainty = depth_uncertainty[topk_inds]\n                attr_score = attr_score[topk_inds]\n                if self.pred_bbox2d:\n                    bbox_pred2d = bbox_pred2d[topk_inds, :]\n            # change the offset to actual center predictions\n            bbox_pred3d[:, :2] = points - bbox_pred3d[:, :2]\n            if rescale:\n                bbox_pred3d[:, :2] /= bbox_pred3d[:, :2].new_tensor(\n                    scale_factor)\n                if self.pred_bbox2d:\n                    bbox_pred2d /= bbox_pred2d.new_tensor(scale_factor)\n            if self.use_depth_classifier:\n                prob_depth_pred = self.bbox_coder.decode_prob_depth(\n                    depth_cls_pred, self.depth_range, self.depth_unit,\n                    self.division, self.num_depth_cls)\n                sig_alpha = torch.sigmoid(self.fuse_lambda)\n                bbox_pred3d[:, 2] = sig_alpha * bbox_pred3d[:, 2] + \\\n                    (1 - sig_alpha) * prob_depth_pred\n            pred_center2d = bbox_pred3d[:, :3].clone()\n            bbox_pred3d[:, :3] = points_img2cam(bbox_pred3d[:, :3], view)\n            mlvl_centers2d.append(pred_center2d)\n            mlvl_bboxes.append(bbox_pred3d)\n            mlvl_scores.append(scores)\n            mlvl_dir_scores.append(dir_cls_score)\n            mlvl_depth_cls_scores.append(depth_cls_score)\n            mlvl_attr_scores.append(attr_score)\n            mlvl_centerness.append(centerness)\n            mlvl_depth_uncertainty.append(depth_uncertainty)\n            if self.pred_bbox2d:\n                bbox_pred2d = distance2bbox(\n                    points, bbox_pred2d, max_shape=input_meta['img_shape'])\n                mlvl_bboxes2d.append(bbox_pred2d)\n\n        mlvl_centers2d = torch.cat(mlvl_centers2d)\n        mlvl_bboxes = torch.cat(mlvl_bboxes)\n        mlvl_dir_scores = torch.cat(mlvl_dir_scores)\n        if self.pred_bbox2d:\n            mlvl_bboxes2d = torch.cat(mlvl_bboxes2d)\n\n        # change local yaw to global yaw for 3D nms\n        cam2img = torch.eye(\n            4, dtype=mlvl_centers2d.dtype, device=mlvl_centers2d.device)\n        cam2img[:view.shape[0], :view.shape[1]] = \\\n            mlvl_centers2d.new_tensor(view)\n        mlvl_bboxes = self.bbox_coder.decode_yaw(mlvl_bboxes, mlvl_centers2d,\n                                                 mlvl_dir_scores,\n                                                 self.dir_offset, cam2img)\n\n        mlvl_bboxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](\n            mlvl_bboxes,\n            box_dim=self.bbox_coder.bbox_code_size,\n            origin=(0.5, 0.5, 0.5)).bev)\n\n        mlvl_scores = torch.cat(mlvl_scores)\n        padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)\n        # remind that we set FG labels to [0, num_class-1] since mmdet v2.0\n        # BG cat_id: num_class\n        mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)\n        mlvl_attr_scores = torch.cat(mlvl_attr_scores)\n        mlvl_centerness = torch.cat(mlvl_centerness)\n        # no scale_factors in box3d_multiclass_nms\n        # Then we multiply it from outside\n        mlvl_nms_scores = mlvl_scores * mlvl_centerness[:, None]\n        if self.use_depth_classifier:  # multiply the depth confidence\n            mlvl_depth_cls_scores = torch.cat(mlvl_depth_cls_scores)\n            mlvl_nms_scores *= mlvl_depth_cls_scores[:, None]\n            if self.weight_dim != -1:\n                mlvl_depth_uncertainty = torch.cat(mlvl_depth_uncertainty)\n                mlvl_nms_scores *= mlvl_depth_uncertainty[:, None]\n        results = box3d_multiclass_nms(mlvl_bboxes, mlvl_bboxes_for_nms,\n                                       mlvl_nms_scores, cfg.score_thr,\n                                       cfg.max_per_img, cfg, mlvl_dir_scores,\n                                       mlvl_attr_scores, mlvl_bboxes2d)\n        bboxes, scores, labels, dir_scores, attrs = results[0:5]\n        attrs = attrs.to(labels.dtype)  # change data type to int\n        bboxes = input_meta['box_type_3d'](\n            bboxes,\n            box_dim=self.bbox_coder.bbox_code_size,\n            origin=(0.5, 0.5, 0.5))\n        # Note that the predictions use origin (0.5, 0.5, 0.5)\n        # Due to the ground truth centers2d are the gravity center of objects\n        # v0.10.0 fix inplace operation to the input tensor of cam_box3d\n        # So here we also need to add origin=(0.5, 0.5, 0.5)\n        if not self.pred_attrs:\n            attrs = None\n\n        outputs = (bboxes, scores, labels, attrs)\n        if self.pred_bbox2d:\n            bboxes2d = results[-1]\n            bboxes2d = torch.cat([bboxes2d, scores[:, None]], dim=1)\n            outputs = outputs + (bboxes2d, )\n\n        return outputs\n\n    def get_targets(self, points, gt_bboxes_list, gt_labels_list,\n                    gt_bboxes_3d_list, gt_labels_3d_list, centers2d_list,\n                    depths_list, attr_labels_list):\n        \"\"\"Compute regression, classification and centerss targets for points\n        in multiple images.\n\n        Args:\n            points (list[Tensor]): Points of each fpn level, each has shape\n                (num_points, 2).\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image,\n                each has shape (num_gt, 4).\n            gt_labels_list (list[Tensor]): Ground truth labels of each box,\n                each has shape (num_gt,).\n            gt_bboxes_3d_list (list[Tensor]): 3D Ground truth bboxes of each\n                image, each has shape (num_gt, bbox_code_size).\n            gt_labels_3d_list (list[Tensor]): 3D Ground truth labels of each\n                box, each has shape (num_gt,).\n            centers2d_list (list[Tensor]): Projected 3D centers onto 2D image,\n                each has shape (num_gt, 2).\n            depths_list (list[Tensor]): Depth of projected 3D centers onto 2D\n                image, each has shape (num_gt, 1).\n            attr_labels_list (list[Tensor]): Attribute labels of each box,\n                each has shape (num_gt,).\n\n        Returns:\n            tuple:\n                concat_lvl_labels (list[Tensor]): Labels of each level. \\\n                concat_lvl_bbox_targets (list[Tensor]): BBox targets of each \\\n                    level.\n        \"\"\"\n        assert len(points) == len(self.regress_ranges)\n        num_levels = len(points)\n        # expand regress ranges to align with points\n        expanded_regress_ranges = [\n            points[i].new_tensor(self.regress_ranges[i])[None].expand_as(\n                points[i]) for i in range(num_levels)\n        ]\n        # concat all levels points and regress ranges\n        concat_regress_ranges = torch.cat(expanded_regress_ranges, dim=0)\n        concat_points = torch.cat(points, dim=0)\n\n        # the number of points per img, per lvl\n        num_points = [center.size(0) for center in points]\n\n        if attr_labels_list is None:\n            attr_labels_list = [\n                gt_labels.new_full(gt_labels.shape, self.attr_background_label)\n                for gt_labels in gt_labels_list\n            ]\n\n        # get labels and bbox_targets of each image\n        _, bbox_targets_list, labels_3d_list, bbox_targets_3d_list, \\\n            centerness_targets_list, attr_targets_list = multi_apply(\n                self._get_target_single,\n                gt_bboxes_list,\n                gt_labels_list,\n                gt_bboxes_3d_list,\n                gt_labels_3d_list,\n                centers2d_list,\n                depths_list,\n                attr_labels_list,\n                points=concat_points,\n                regress_ranges=concat_regress_ranges,\n                num_points_per_lvl=num_points)\n\n        # split to per img, per level\n        bbox_targets_list = [\n            bbox_targets.split(num_points, 0)\n            for bbox_targets in bbox_targets_list\n        ]\n        labels_3d_list = [\n            labels_3d.split(num_points, 0) for labels_3d in labels_3d_list\n        ]\n        bbox_targets_3d_list = [\n            bbox_targets_3d.split(num_points, 0)\n            for bbox_targets_3d in bbox_targets_3d_list\n        ]\n        centerness_targets_list = [\n            centerness_targets.split(num_points, 0)\n            for centerness_targets in centerness_targets_list\n        ]\n        attr_targets_list = [\n            attr_targets.split(num_points, 0)\n            for attr_targets in attr_targets_list\n        ]\n\n        # concat per level image\n        concat_lvl_labels_3d = []\n        concat_lvl_bbox_targets_3d = []\n        concat_lvl_centerness_targets = []\n        concat_lvl_attr_targets = []\n        for i in range(num_levels):\n            concat_lvl_labels_3d.append(\n                torch.cat([labels[i] for labels in labels_3d_list]))\n            concat_lvl_centerness_targets.append(\n                torch.cat([\n                    centerness_targets[i]\n                    for centerness_targets in centerness_targets_list\n                ]))\n            bbox_targets_3d = torch.cat([\n                bbox_targets_3d[i] for bbox_targets_3d in bbox_targets_3d_list\n            ])\n            if self.pred_bbox2d:\n                bbox_targets = torch.cat(\n                    [bbox_targets[i] for bbox_targets in bbox_targets_list])\n                bbox_targets_3d = torch.cat([bbox_targets_3d, bbox_targets],\n                                            dim=1)\n            concat_lvl_attr_targets.append(\n                torch.cat(\n                    [attr_targets[i] for attr_targets in attr_targets_list]))\n            if self.norm_on_bbox:\n                bbox_targets_3d[:, :2] = \\\n                    bbox_targets_3d[:, :2] / self.strides[i]\n                if self.pred_bbox2d:\n                    bbox_targets_3d[:, -4:] = \\\n                        bbox_targets_3d[:, -4:] / self.strides[i]\n            concat_lvl_bbox_targets_3d.append(bbox_targets_3d)\n        return concat_lvl_labels_3d, concat_lvl_bbox_targets_3d, \\\n            concat_lvl_centerness_targets, concat_lvl_attr_targets\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/point_rpn_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.runner import BaseModule, force_fp32\nfrom torch import nn as nn\n\nfrom mmdet3d.core import xywhr2xyxyr\nfrom mmdet3d.core.bbox.structures import (DepthInstance3DBoxes,\n                                          LiDARInstance3DBoxes)\nfrom mmdet3d.core.post_processing import nms_bev, nms_normal_bev\nfrom mmdet.core import build_bbox_coder, multi_apply\nfrom ..builder import HEADS, build_loss\n\n\n@HEADS.register_module()\nclass PointRPNHead(BaseModule):\n    \"\"\"RPN module for PointRCNN.\n\n    Args:\n        num_classes (int): Number of classes.\n        train_cfg (dict): Train configs.\n        test_cfg (dict): Test configs.\n        pred_layer_cfg (dict, optional): Config of classification and\n            regression prediction layers. Defaults to None.\n        enlarge_width (float, optional): Enlarge bbox for each side to ignore\n            close points. Defaults to 0.1.\n        cls_loss (dict, optional): Config of direction classification loss.\n            Defaults to None.\n        bbox_loss (dict, optional): Config of localization loss.\n            Defaults to None.\n        bbox_coder (dict, optional): Config dict of box coders.\n            Defaults to None.\n        init_cfg (dict, optional): Config of initialization. Defaults to None.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 train_cfg,\n                 test_cfg,\n                 pred_layer_cfg=None,\n                 enlarge_width=0.1,\n                 cls_loss=None,\n                 bbox_loss=None,\n                 bbox_coder=None,\n                 init_cfg=None):\n        super().__init__(init_cfg=init_cfg)\n        self.num_classes = num_classes\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.enlarge_width = enlarge_width\n\n        # build loss function\n        self.bbox_loss = build_loss(bbox_loss)\n        self.cls_loss = build_loss(cls_loss)\n\n        # build box coder\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n\n        # build pred conv\n        self.cls_layers = self._make_fc_layers(\n            fc_cfg=pred_layer_cfg.cls_linear_channels,\n            input_channels=pred_layer_cfg.in_channels,\n            output_channels=self._get_cls_out_channels())\n\n        self.reg_layers = self._make_fc_layers(\n            fc_cfg=pred_layer_cfg.reg_linear_channels,\n            input_channels=pred_layer_cfg.in_channels,\n            output_channels=self._get_reg_out_channels())\n\n    def _make_fc_layers(self, fc_cfg, input_channels, output_channels):\n        \"\"\"Make fully connect layers.\n\n        Args:\n            fc_cfg (dict): Config of fully connect.\n            input_channels (int): Input channels for fc_layers.\n            output_channels (int): Input channels for fc_layers.\n\n        Returns:\n            nn.Sequential: Fully connect layers.\n        \"\"\"\n        fc_layers = []\n        c_in = input_channels\n        for k in range(0, fc_cfg.__len__()):\n            fc_layers.extend([\n                nn.Linear(c_in, fc_cfg[k], bias=False),\n                nn.BatchNorm1d(fc_cfg[k]),\n                nn.ReLU(),\n            ])\n            c_in = fc_cfg[k]\n        fc_layers.append(nn.Linear(c_in, output_channels, bias=True))\n        return nn.Sequential(*fc_layers)\n\n    def _get_cls_out_channels(self):\n        \"\"\"Return the channel number of classification outputs.\"\"\"\n        # Class numbers (k) + objectness (1)\n        return self.num_classes\n\n    def _get_reg_out_channels(self):\n        \"\"\"Return the channel number of regression outputs.\"\"\"\n        # Bbox classification and regression\n        # (center residual (3), size regression (3)\n        # torch.cos(yaw) (1), torch.sin(yaw) (1)\n        return self.bbox_coder.code_size\n\n    def forward(self, feat_dict):\n        \"\"\"Forward pass.\n\n        Args:\n            feat_dict (dict): Feature dict from backbone.\n\n        Returns:\n            tuple[list[torch.Tensor]]: Predicted boxes and classification\n                scores.\n        \"\"\"\n        point_features = feat_dict['fp_features']\n        point_features = point_features.permute(0, 2, 1).contiguous()\n        batch_size = point_features.shape[0]\n        feat_cls = point_features.view(-1, point_features.shape[-1])\n        feat_reg = point_features.view(-1, point_features.shape[-1])\n\n        point_cls_preds = self.cls_layers(feat_cls).reshape(\n            batch_size, -1, self._get_cls_out_channels())\n        point_box_preds = self.reg_layers(feat_reg).reshape(\n            batch_size, -1, self._get_reg_out_channels())\n        return point_box_preds, point_cls_preds\n\n    @force_fp32(apply_to=('bbox_preds'))\n    def loss(self,\n             bbox_preds,\n             cls_preds,\n             points,\n             gt_bboxes_3d,\n             gt_labels_3d,\n             img_metas=None):\n        \"\"\"Compute loss.\n\n        Args:\n            bbox_preds (dict): Predictions from forward of PointRCNN RPN_Head.\n            cls_preds (dict): Classification from forward of PointRCNN\n                RPN_Head.\n            points (list[torch.Tensor]): Input points.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                bboxes of each sample.\n            gt_labels_3d (list[torch.Tensor]): Labels of each sample.\n            img_metas (list[dict], Optional): Contain pcd and img's meta info.\n                Defaults to None.\n\n        Returns:\n            dict: Losses of PointRCNN RPN module.\n        \"\"\"\n        targets = self.get_targets(points, gt_bboxes_3d, gt_labels_3d)\n        (bbox_targets, mask_targets, positive_mask, negative_mask,\n         box_loss_weights, point_targets) = targets\n\n        # bbox loss\n        bbox_loss = self.bbox_loss(bbox_preds, bbox_targets,\n                                   box_loss_weights.unsqueeze(-1))\n        # calculate semantic loss\n        semantic_points = cls_preds.reshape(-1, self.num_classes)\n        semantic_targets = mask_targets\n        semantic_targets[negative_mask] = self.num_classes\n        semantic_points_label = semantic_targets\n        # for ignore, but now we do not have ignored label\n        semantic_loss_weight = negative_mask.float() + positive_mask.float()\n        semantic_loss = self.cls_loss(semantic_points,\n                                      semantic_points_label.reshape(-1),\n                                      semantic_loss_weight.reshape(-1))\n        semantic_loss /= positive_mask.float().sum()\n        losses = dict(bbox_loss=bbox_loss, semantic_loss=semantic_loss)\n\n        return losses\n\n    def get_targets(self, points, gt_bboxes_3d, gt_labels_3d):\n        \"\"\"Generate targets of PointRCNN RPN head.\n\n        Args:\n            points (list[torch.Tensor]): Points of each batch.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                bboxes of each batch.\n            gt_labels_3d (list[torch.Tensor]): Labels of each batch.\n\n        Returns:\n            tuple[torch.Tensor]: Targets of PointRCNN RPN head.\n        \"\"\"\n        # find empty example\n        for index in range(len(gt_labels_3d)):\n            if len(gt_labels_3d[index]) == 0:\n                fake_box = gt_bboxes_3d[index].tensor.new_zeros(\n                    1, gt_bboxes_3d[index].tensor.shape[-1])\n                gt_bboxes_3d[index] = gt_bboxes_3d[index].new_box(fake_box)\n                gt_labels_3d[index] = gt_labels_3d[index].new_zeros(1)\n\n        (bbox_targets, mask_targets, positive_mask, negative_mask,\n         point_targets) = multi_apply(self.get_targets_single, points,\n                                      gt_bboxes_3d, gt_labels_3d)\n\n        bbox_targets = torch.stack(bbox_targets)\n        mask_targets = torch.stack(mask_targets)\n        positive_mask = torch.stack(positive_mask)\n        negative_mask = torch.stack(negative_mask)\n        box_loss_weights = positive_mask / (positive_mask.sum() + 1e-6)\n\n        return (bbox_targets, mask_targets, positive_mask, negative_mask,\n                box_loss_weights, point_targets)\n\n    def get_targets_single(self, points, gt_bboxes_3d, gt_labels_3d):\n        \"\"\"Generate targets of PointRCNN RPN head for single batch.\n\n        Args:\n            points (torch.Tensor): Points of each batch.\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth\n                boxes of each batch.\n            gt_labels_3d (torch.Tensor): Labels of each batch.\n\n        Returns:\n            tuple[torch.Tensor]: Targets of ssd3d head.\n        \"\"\"\n        gt_bboxes_3d = gt_bboxes_3d.to(points.device)\n\n        valid_gt = gt_labels_3d != -1\n        gt_bboxes_3d = gt_bboxes_3d[valid_gt]\n        gt_labels_3d = gt_labels_3d[valid_gt]\n\n        # transform the bbox coordinate to the point cloud coordinate\n        gt_bboxes_3d_tensor = gt_bboxes_3d.tensor.clone()\n        gt_bboxes_3d_tensor[..., 2] += gt_bboxes_3d_tensor[..., 5] / 2\n\n        points_mask, assignment = self._assign_targets_by_points_inside(\n            gt_bboxes_3d, points)\n        gt_bboxes_3d_tensor = gt_bboxes_3d_tensor[assignment]\n        mask_targets = gt_labels_3d[assignment]\n\n        bbox_targets = self.bbox_coder.encode(gt_bboxes_3d_tensor,\n                                              points[..., 0:3], mask_targets)\n\n        positive_mask = (points_mask.max(1)[0] > 0)\n        # add ignore_mask\n        extend_gt_bboxes_3d = gt_bboxes_3d.enlarged_box(self.enlarge_width)\n        points_mask, _ = self._assign_targets_by_points_inside(\n            extend_gt_bboxes_3d, points)\n        negative_mask = (points_mask.max(1)[0] == 0)\n\n        point_targets = points[..., 0:3]\n        return (bbox_targets, mask_targets, positive_mask, negative_mask,\n                point_targets)\n\n    def get_bboxes(self,\n                   points,\n                   bbox_preds,\n                   cls_preds,\n                   input_metas,\n                   rescale=False):\n        \"\"\"Generate bboxes from RPN head predictions.\n\n        Args:\n            points (torch.Tensor): Input points.\n            bbox_preds (dict): Regression predictions from PointRCNN head.\n            cls_preds (dict): Class scores predictions from PointRCNN head.\n            input_metas (list[dict]): Point cloud and image's meta info.\n            rescale (bool, optional): Whether to rescale bboxes.\n                Defaults to False.\n\n        Returns:\n            list[tuple[torch.Tensor]]: Bounding boxes, scores and labels.\n        \"\"\"\n        sem_scores = cls_preds.sigmoid()\n        obj_scores = sem_scores.max(-1)[0]\n        object_class = sem_scores.argmax(dim=-1)\n\n        batch_size = sem_scores.shape[0]\n        results = list()\n        for b in range(batch_size):\n            bbox3d = self.bbox_coder.decode(bbox_preds[b], points[b, ..., :3],\n                                            object_class[b])\n            bbox_selected, score_selected, labels, cls_preds_selected = \\\n                self.class_agnostic_nms(obj_scores[b], sem_scores[b], bbox3d,\n                                        points[b, ..., :3], input_metas[b])\n            bbox = input_metas[b]['box_type_3d'](\n                bbox_selected.clone(),\n                box_dim=bbox_selected.shape[-1],\n                with_yaw=True)\n            results.append((bbox, score_selected, labels, cls_preds_selected))\n        return results\n\n    def class_agnostic_nms(self, obj_scores, sem_scores, bbox, points,\n                           input_meta):\n        \"\"\"Class agnostic nms.\n\n        Args:\n            obj_scores (torch.Tensor): Objectness score of bounding boxes.\n            sem_scores (torch.Tensor): Semantic class score of bounding boxes.\n            bbox (torch.Tensor): Predicted bounding boxes.\n\n        Returns:\n            tuple[torch.Tensor]: Bounding boxes, scores and labels.\n        \"\"\"\n        nms_cfg = self.test_cfg.nms_cfg if not self.training \\\n            else self.train_cfg.nms_cfg\n        if nms_cfg.use_rotate_nms:\n            nms_func = nms_bev\n        else:\n            nms_func = nms_normal_bev\n\n        num_bbox = bbox.shape[0]\n        bbox = input_meta['box_type_3d'](\n            bbox.clone(),\n            box_dim=bbox.shape[-1],\n            with_yaw=True,\n            origin=(0.5, 0.5, 0.5))\n\n        if isinstance(bbox, LiDARInstance3DBoxes):\n            box_idx = bbox.points_in_boxes(points)\n            box_indices = box_idx.new_zeros([num_bbox + 1])\n            box_idx[box_idx == -1] = num_bbox\n            box_indices.scatter_add_(0, box_idx.long(),\n                                     box_idx.new_ones(box_idx.shape))\n            box_indices = box_indices[:-1]\n            nonempty_box_mask = box_indices >= 0\n        elif isinstance(bbox, DepthInstance3DBoxes):\n            box_indices = bbox.points_in_boxes(points)\n            nonempty_box_mask = box_indices.T.sum(1) >= 0\n        else:\n            raise NotImplementedError('Unsupported bbox type!')\n\n        bbox = bbox[nonempty_box_mask]\n\n        if self.test_cfg.score_thr is not None:\n            score_thr = self.test_cfg.score_thr\n            keep = (obj_scores >= score_thr)\n            obj_scores = obj_scores[keep]\n            sem_scores = sem_scores[keep]\n            bbox = bbox.tensor[keep]\n\n        if obj_scores.shape[0] > 0:\n            topk = min(nms_cfg.nms_pre, obj_scores.shape[0])\n            obj_scores_nms, indices = torch.topk(obj_scores, k=topk)\n            bbox_for_nms = xywhr2xyxyr(bbox[indices].bev)\n            sem_scores_nms = sem_scores[indices]\n\n            keep = nms_func(bbox_for_nms, obj_scores_nms, nms_cfg.iou_thr)\n            keep = keep[:nms_cfg.nms_post]\n\n            bbox_selected = bbox.tensor[indices][keep]\n            score_selected = obj_scores_nms[keep]\n            cls_preds = sem_scores_nms[keep]\n            labels = torch.argmax(cls_preds, -1)\n        else:\n            bbox_selected = bbox.tensor\n            score_selected = obj_scores.new_zeros([0])\n            labels = obj_scores.new_zeros([0])\n            cls_preds = obj_scores.new_zeros([0, sem_scores.shape[-1]])\n\n        return bbox_selected, score_selected, labels, cls_preds\n\n    def _assign_targets_by_points_inside(self, bboxes_3d, points):\n        \"\"\"Compute assignment by checking whether point is inside bbox.\n\n        Args:\n            bboxes_3d (:obj:`BaseInstance3DBoxes`): Instance of bounding boxes.\n            points (torch.Tensor): Points of a batch.\n\n        Returns:\n            tuple[torch.Tensor]: Flags indicating whether each point is\n                inside bbox and the index of box where each point are in.\n        \"\"\"\n        # TODO: align points_in_boxes function in each box_structures\n        num_bbox = bboxes_3d.tensor.shape[0]\n        if isinstance(bboxes_3d, LiDARInstance3DBoxes):\n            assignment = bboxes_3d.points_in_boxes(points[:, 0:3]).long()\n            points_mask = assignment.new_zeros(\n                [assignment.shape[0], num_bbox + 1])\n            assignment[assignment == -1] = num_bbox\n            points_mask.scatter_(1, assignment.unsqueeze(1), 1)\n            points_mask = points_mask[:, :-1]\n            assignment[assignment == num_bbox] = num_bbox - 1\n        elif isinstance(bboxes_3d, DepthInstance3DBoxes):\n            points_mask = bboxes_3d.points_in_boxes(points)\n            assignment = points_mask.argmax(dim=-1)\n        else:\n            raise NotImplementedError('Unsupported bbox type!')\n\n        return points_mask, assignment\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/shape_aware_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport numpy as np\nimport torch\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule\nfrom torch import nn as nn\n\nfrom mmdet3d.core import box3d_multiclass_nms, limit_period, xywhr2xyxyr\nfrom mmdet.core import multi_apply\nfrom ..builder import HEADS, build_head\nfrom .anchor3d_head import Anchor3DHead\n\n\n@HEADS.register_module()\nclass BaseShapeHead(BaseModule):\n    \"\"\"Base Shape-aware Head in Shape Signature Network.\n\n    Note:\n        This base shape-aware grouping head uses default settings for small\n        objects. For large and huge objects, it is recommended to use\n        heavier heads, like (64, 64, 64) and (128, 128, 64, 64, 64) in\n        shared conv channels, (2, 1, 1) and (2, 1, 2, 1, 1) in shared\n        conv strides. For tiny objects, we can use smaller heads, like\n        (32, 32) channels and (1, 1) strides.\n\n    Args:\n        num_cls (int): Number of classes.\n        num_base_anchors (int): Number of anchors per location.\n        box_code_size (int): The dimension of boxes to be encoded.\n        in_channels (int): Input channels for convolutional layers.\n        shared_conv_channels (tuple, optional): Channels for shared\n            convolutional layers. Default: (64, 64).\n        shared_conv_strides (tuple, optional): Strides for shared\n            convolutional layers. Default: (1, 1).\n        use_direction_classifier (bool, optional): Whether to use direction\n            classifier. Default: True.\n        conv_cfg (dict, optional): Config of conv layer.\n            Default: dict(type='Conv2d')\n        norm_cfg (dict, optional): Config of norm layer.\n            Default: dict(type='BN2d').\n        bias (bool | str, optional): Type of bias. Default: False.\n    \"\"\"\n\n    def __init__(self,\n                 num_cls,\n                 num_base_anchors,\n                 box_code_size,\n                 in_channels,\n                 shared_conv_channels=(64, 64),\n                 shared_conv_strides=(1, 1),\n                 use_direction_classifier=True,\n                 conv_cfg=dict(type='Conv2d'),\n                 norm_cfg=dict(type='BN2d'),\n                 bias=False,\n                 init_cfg=None):\n        super().__init__(init_cfg=init_cfg)\n        self.num_cls = num_cls\n        self.num_base_anchors = num_base_anchors\n        self.use_direction_classifier = use_direction_classifier\n        self.box_code_size = box_code_size\n\n        assert len(shared_conv_channels) == len(shared_conv_strides), \\\n            'Lengths of channels and strides list should be equal.'\n\n        self.shared_conv_channels = [in_channels] + list(shared_conv_channels)\n        self.shared_conv_strides = list(shared_conv_strides)\n\n        shared_conv = []\n        for i in range(len(self.shared_conv_strides)):\n            shared_conv.append(\n                ConvModule(\n                    self.shared_conv_channels[i],\n                    self.shared_conv_channels[i + 1],\n                    kernel_size=3,\n                    stride=self.shared_conv_strides[i],\n                    padding=1,\n                    conv_cfg=conv_cfg,\n                    bias=bias,\n                    norm_cfg=norm_cfg))\n\n        self.shared_conv = nn.Sequential(*shared_conv)\n\n        out_channels = self.shared_conv_channels[-1]\n        self.conv_cls = nn.Conv2d(out_channels, num_base_anchors * num_cls, 1)\n        self.conv_reg = nn.Conv2d(out_channels,\n                                  num_base_anchors * box_code_size, 1)\n\n        if use_direction_classifier:\n            self.conv_dir_cls = nn.Conv2d(out_channels, num_base_anchors * 2,\n                                          1)\n        if init_cfg is None:\n            if use_direction_classifier:\n                self.init_cfg = dict(\n                    type='Kaiming',\n                    layer='Conv2d',\n                    override=[\n                        dict(type='Normal', name='conv_reg', std=0.01),\n                        dict(\n                            type='Normal',\n                            name='conv_cls',\n                            std=0.01,\n                            bias_prob=0.01),\n                        dict(\n                            type='Normal',\n                            name='conv_dir_cls',\n                            std=0.01,\n                            bias_prob=0.01)\n                    ])\n            else:\n                self.init_cfg = dict(\n                    type='Kaiming',\n                    layer='Conv2d',\n                    override=[\n                        dict(type='Normal', name='conv_reg', std=0.01),\n                        dict(\n                            type='Normal',\n                            name='conv_cls',\n                            std=0.01,\n                            bias_prob=0.01)\n                    ])\n\n    def forward(self, x):\n        \"\"\"Forward function for SmallHead.\n\n        Args:\n            x (torch.Tensor): Input feature map with the shape of\n                [B, C, H, W].\n\n        Returns:\n            dict[torch.Tensor]: Contain score of each class, bbox\n                regression and direction classification predictions.\n                Note that all the returned tensors are reshaped as\n                [bs*num_base_anchors*H*W, num_cls/box_code_size/dir_bins].\n                It is more convenient to concat anchors for different\n                classes even though they have different feature map sizes.\n        \"\"\"\n        x = self.shared_conv(x)\n        cls_score = self.conv_cls(x)\n        bbox_pred = self.conv_reg(x)\n        featmap_size = bbox_pred.shape[-2:]\n        H, W = featmap_size\n        B = bbox_pred.shape[0]\n        cls_score = cls_score.view(-1, self.num_base_anchors, self.num_cls, H,\n                                   W).permute(0, 1, 3, 4,\n                                              2).reshape(B, -1, self.num_cls)\n        bbox_pred = bbox_pred.view(-1, self.num_base_anchors,\n                                   self.box_code_size, H, W).permute(\n                                       0, 1, 3, 4,\n                                       2).reshape(B, -1, self.box_code_size)\n\n        dir_cls_preds = None\n        if self.use_direction_classifier:\n            dir_cls_preds = self.conv_dir_cls(x)\n            dir_cls_preds = dir_cls_preds.view(-1, self.num_base_anchors, 2, H,\n                                               W).permute(0, 1, 3, 4,\n                                                          2).reshape(B, -1, 2)\n        ret = dict(\n            cls_score=cls_score,\n            bbox_pred=bbox_pred,\n            dir_cls_preds=dir_cls_preds,\n            featmap_size=featmap_size)\n        return ret\n\n\n@HEADS.register_module()\nclass ShapeAwareHead(Anchor3DHead):\n    \"\"\"Shape-aware grouping head for SSN.\n\n    Args:\n        tasks (dict): Shape-aware groups of multi-class objects.\n        assign_per_class (bool, optional): Whether to do assignment for each\n            class. Default: True.\n        kwargs (dict): Other arguments are the same as those in\n            :class:`Anchor3DHead`.\n    \"\"\"\n\n    def __init__(self, tasks, assign_per_class=True, init_cfg=None, **kwargs):\n        self.tasks = tasks\n        self.featmap_sizes = []\n        super().__init__(\n            assign_per_class=assign_per_class, init_cfg=init_cfg, **kwargs)\n\n    def init_weights(self):\n        if not self._is_init:\n            for m in self.heads:\n                if hasattr(m, 'init_weights'):\n                    m.init_weights()\n            self._is_init = True\n        else:\n            warnings.warn(f'init_weights of {self.__class__.__name__} has '\n                          f'been called more than once.')\n\n    def _init_layers(self):\n        \"\"\"Initialize neural network layers of the head.\"\"\"\n        self.heads = nn.ModuleList()\n        cls_ptr = 0\n        for task in self.tasks:\n            sizes = self.anchor_generator.sizes[cls_ptr:cls_ptr +\n                                                task['num_class']]\n            num_size = torch.tensor(sizes).reshape(-1, 3).size(0)\n            num_rot = len(self.anchor_generator.rotations)\n            num_base_anchors = num_rot * num_size\n            branch = dict(\n                type='BaseShapeHead',\n                num_cls=self.num_classes,\n                num_base_anchors=num_base_anchors,\n                box_code_size=self.box_code_size,\n                in_channels=self.in_channels,\n                shared_conv_channels=task['shared_conv_channels'],\n                shared_conv_strides=task['shared_conv_strides'])\n            self.heads.append(build_head(branch))\n            cls_ptr += task['num_class']\n\n    def forward_single(self, x):\n        \"\"\"Forward function on a single-scale feature map.\n\n        Args:\n            x (torch.Tensor): Input features.\n        Returns:\n            tuple[torch.Tensor]: Contain score of each class, bbox\n                regression and direction classification predictions.\n        \"\"\"\n        results = []\n\n        for head in self.heads:\n            results.append(head(x))\n\n        cls_score = torch.cat([result['cls_score'] for result in results],\n                              dim=1)\n        bbox_pred = torch.cat([result['bbox_pred'] for result in results],\n                              dim=1)\n        dir_cls_preds = None\n        if self.use_direction_classifier:\n            dir_cls_preds = torch.cat(\n                [result['dir_cls_preds'] for result in results], dim=1)\n\n        self.featmap_sizes = []\n        for i, task in enumerate(self.tasks):\n            for _ in range(task['num_class']):\n                self.featmap_sizes.append(results[i]['featmap_size'])\n        assert len(self.featmap_sizes) == len(self.anchor_generator.ranges), \\\n            'Length of feature map sizes must be equal to length of ' + \\\n            'different ranges of anchor generator.'\n\n        return cls_score, bbox_pred, dir_cls_preds\n\n    def loss_single(self, cls_score, bbox_pred, dir_cls_preds, labels,\n                    label_weights, bbox_targets, bbox_weights, dir_targets,\n                    dir_weights, num_total_samples):\n        \"\"\"Calculate loss of Single-level results.\n\n        Args:\n            cls_score (torch.Tensor): Class score in single-level.\n            bbox_pred (torch.Tensor): Bbox prediction in single-level.\n            dir_cls_preds (torch.Tensor): Predictions of direction class\n                in single-level.\n            labels (torch.Tensor): Labels of class.\n            label_weights (torch.Tensor): Weights of class loss.\n            bbox_targets (torch.Tensor): Targets of bbox predictions.\n            bbox_weights (torch.Tensor): Weights of bbox loss.\n            dir_targets (torch.Tensor): Targets of direction predictions.\n            dir_weights (torch.Tensor): Weights of direction loss.\n            num_total_samples (int): The number of valid samples.\n\n        Returns:\n            tuple[torch.Tensor]: Losses of class, bbox\n                and direction, respectively.\n        \"\"\"\n        # classification loss\n        if num_total_samples is None:\n            num_total_samples = int(cls_score.shape[0])\n        labels = labels.reshape(-1)\n        label_weights = label_weights.reshape(-1)\n        cls_score = cls_score.reshape(-1, self.num_classes)\n        loss_cls = self.loss_cls(\n            cls_score, labels, label_weights, avg_factor=num_total_samples)\n\n        # regression loss\n        bbox_targets = bbox_targets.reshape(-1, self.box_code_size)\n        bbox_weights = bbox_weights.reshape(-1, self.box_code_size)\n        code_weight = self.train_cfg.get('code_weight', None)\n\n        if code_weight:\n            bbox_weights = bbox_weights * bbox_weights.new_tensor(code_weight)\n        bbox_pred = bbox_pred.reshape(-1, self.box_code_size)\n        if self.diff_rad_by_sin:\n            bbox_pred, bbox_targets = self.add_sin_difference(\n                bbox_pred, bbox_targets)\n        loss_bbox = self.loss_bbox(\n            bbox_pred,\n            bbox_targets,\n            bbox_weights,\n            avg_factor=num_total_samples)\n\n        # direction classification loss\n        loss_dir = None\n        if self.use_direction_classifier:\n            dir_cls_preds = dir_cls_preds.reshape(-1, 2)\n            dir_targets = dir_targets.reshape(-1)\n            dir_weights = dir_weights.reshape(-1)\n            loss_dir = self.loss_dir(\n                dir_cls_preds,\n                dir_targets,\n                dir_weights,\n                avg_factor=num_total_samples)\n\n        return loss_cls, loss_bbox, loss_dir\n\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             dir_cls_preds,\n             gt_bboxes,\n             gt_labels,\n             input_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Calculate losses.\n\n        Args:\n            cls_scores (list[torch.Tensor]): Multi-level class scores.\n            bbox_preds (list[torch.Tensor]): Multi-level bbox predictions.\n            dir_cls_preds (list[torch.Tensor]): Multi-level direction\n                class predictions.\n            gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Gt bboxes\n                of each sample.\n            gt_labels (list[torch.Tensor]): Gt labels of each sample.\n            input_metas (list[dict]): Contain pcd and img's meta info.\n            gt_bboxes_ignore (list[torch.Tensor]): Specify\n                which bounding.\n\n        Returns:\n            dict[str, list[torch.Tensor]]: Classification, bbox, and\n                direction losses of each level.\n\n                - loss_cls (list[torch.Tensor]): Classification losses.\n                - loss_bbox (list[torch.Tensor]): Box regression losses.\n                - loss_dir (list[torch.Tensor]): Direction classification\n                    losses.\n        \"\"\"\n        device = cls_scores[0].device\n        anchor_list = self.get_anchors(\n            self.featmap_sizes, input_metas, device=device)\n        cls_reg_targets = self.anchor_target_3d(\n            anchor_list,\n            gt_bboxes,\n            input_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            num_classes=self.num_classes,\n            sampling=self.sampling)\n\n        if cls_reg_targets is None:\n            return None\n        (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,\n         dir_targets_list, dir_weights_list, num_total_pos,\n         num_total_neg) = cls_reg_targets\n        num_total_samples = (\n            num_total_pos + num_total_neg if self.sampling else num_total_pos)\n\n        # num_total_samples = None\n        losses_cls, losses_bbox, losses_dir = multi_apply(\n            self.loss_single,\n            cls_scores,\n            bbox_preds,\n            dir_cls_preds,\n            labels_list,\n            label_weights_list,\n            bbox_targets_list,\n            bbox_weights_list,\n            dir_targets_list,\n            dir_weights_list,\n            num_total_samples=num_total_samples)\n        return dict(\n            loss_cls=losses_cls, loss_bbox=losses_bbox, loss_dir=losses_dir)\n\n    def get_bboxes(self,\n                   cls_scores,\n                   bbox_preds,\n                   dir_cls_preds,\n                   input_metas,\n                   cfg=None,\n                   rescale=False):\n        \"\"\"Get bboxes of anchor head.\n\n        Args:\n            cls_scores (list[torch.Tensor]): Multi-level class scores.\n            bbox_preds (list[torch.Tensor]): Multi-level bbox predictions.\n            dir_cls_preds (list[torch.Tensor]): Multi-level direction\n                class predictions.\n            input_metas (list[dict]): Contain pcd and img's meta info.\n            cfg (:obj:`ConfigDict`, optional): Training or testing config.\n                Default: None.\n            rescale (list[torch.Tensor], optional): Whether to rescale bbox.\n                Default: False.\n\n        Returns:\n            list[tuple]: Prediction resultes of batches.\n        \"\"\"\n        assert len(cls_scores) == len(bbox_preds)\n        assert len(cls_scores) == len(dir_cls_preds)\n        num_levels = len(cls_scores)\n        assert num_levels == 1, 'Only support single level inference.'\n        device = cls_scores[0].device\n        mlvl_anchors = self.anchor_generator.grid_anchors(\n            self.featmap_sizes, device=device)\n        # `anchor` is a list of anchors for different classes\n        mlvl_anchors = [torch.cat(anchor, dim=0) for anchor in mlvl_anchors]\n\n        result_list = []\n        for img_id in range(len(input_metas)):\n            cls_score_list = [\n                cls_scores[i][img_id].detach() for i in range(num_levels)\n            ]\n            bbox_pred_list = [\n                bbox_preds[i][img_id].detach() for i in range(num_levels)\n            ]\n            dir_cls_pred_list = [\n                dir_cls_preds[i][img_id].detach() for i in range(num_levels)\n            ]\n\n            input_meta = input_metas[img_id]\n            proposals = self.get_bboxes_single(cls_score_list, bbox_pred_list,\n                                               dir_cls_pred_list, mlvl_anchors,\n                                               input_meta, cfg, rescale)\n            result_list.append(proposals)\n        return result_list\n\n    def get_bboxes_single(self,\n                          cls_scores,\n                          bbox_preds,\n                          dir_cls_preds,\n                          mlvl_anchors,\n                          input_meta,\n                          cfg=None,\n                          rescale=False):\n        \"\"\"Get bboxes of single branch.\n\n        Args:\n            cls_scores (torch.Tensor): Class score in single batch.\n            bbox_preds (torch.Tensor): Bbox prediction in single batch.\n            dir_cls_preds (torch.Tensor): Predictions of direction class\n                in single batch.\n            mlvl_anchors (List[torch.Tensor]): Multi-level anchors\n                in single batch.\n            input_meta (list[dict]): Contain pcd and img's meta info.\n            cfg (:obj:`ConfigDict`): Training or testing config.\n            rescale (list[torch.Tensor], optional): whether to rescale bbox.\n                Default: False.\n\n        Returns:\n            tuple: Contain predictions of single batch.\n\n                - bboxes (:obj:`BaseInstance3DBoxes`): Predicted 3d bboxes.\n                - scores (torch.Tensor): Class score of each bbox.\n                - labels (torch.Tensor): Label of each bbox.\n        \"\"\"\n        cfg = self.test_cfg if cfg is None else cfg\n        assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)\n        mlvl_bboxes = []\n        mlvl_scores = []\n        mlvl_dir_scores = []\n        for cls_score, bbox_pred, dir_cls_pred, anchors in zip(\n                cls_scores, bbox_preds, dir_cls_preds, mlvl_anchors):\n            assert cls_score.size()[-2] == bbox_pred.size()[-2]\n            assert cls_score.size()[-2] == dir_cls_pred.size()[-2]\n            dir_cls_score = torch.max(dir_cls_pred, dim=-1)[1]\n\n            if self.use_sigmoid_cls:\n                scores = cls_score.sigmoid()\n            else:\n                scores = cls_score.softmax(-1)\n\n            nms_pre = cfg.get('nms_pre', -1)\n            if nms_pre > 0 and scores.shape[0] > nms_pre:\n                if self.use_sigmoid_cls:\n                    max_scores, _ = scores.max(dim=1)\n                else:\n                    max_scores, _ = scores[:, :-1].max(dim=1)\n                _, topk_inds = max_scores.topk(nms_pre)\n                anchors = anchors[topk_inds, :]\n                bbox_pred = bbox_pred[topk_inds, :]\n                scores = scores[topk_inds, :]\n                dir_cls_score = dir_cls_score[topk_inds]\n\n            bboxes = self.bbox_coder.decode(anchors, bbox_pred)\n            mlvl_bboxes.append(bboxes)\n            mlvl_scores.append(scores)\n            mlvl_dir_scores.append(dir_cls_score)\n\n        mlvl_bboxes = torch.cat(mlvl_bboxes)\n        mlvl_bboxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](\n            mlvl_bboxes, box_dim=self.box_code_size).bev)\n        mlvl_scores = torch.cat(mlvl_scores)\n        mlvl_dir_scores = torch.cat(mlvl_dir_scores)\n\n        if self.use_sigmoid_cls:\n            # Add a dummy background class to the front when using sigmoid\n            padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)\n            mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)\n\n        score_thr = cfg.get('score_thr', 0)\n        results = box3d_multiclass_nms(mlvl_bboxes, mlvl_bboxes_for_nms,\n                                       mlvl_scores, score_thr, cfg.max_num,\n                                       cfg, mlvl_dir_scores)\n        bboxes, scores, labels, dir_scores = results\n        if bboxes.shape[0] > 0:\n            dir_rot = limit_period(bboxes[..., 6] - self.dir_offset,\n                                   self.dir_limit_offset, np.pi)\n            bboxes[..., 6] = (\n                dir_rot + self.dir_offset +\n                np.pi * dir_scores.to(bboxes.dtype))\n        bboxes = input_meta['box_type_3d'](bboxes, box_dim=self.box_code_size)\n        return bboxes, scores, labels\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/smoke_mono3d_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom torch.nn import functional as F\n\nfrom mmdet.core import multi_apply\nfrom mmdet.core.bbox.builder import build_bbox_coder\nfrom mmdet.models.utils import gaussian_radius, gen_gaussian_target\nfrom mmdet.models.utils.gaussian_target import (get_local_maximum,\n                                                get_topk_from_heatmap,\n                                                transpose_and_gather_feat)\nfrom ..builder import HEADS\nfrom .anchor_free_mono3d_head import AnchorFreeMono3DHead\n\n\n@HEADS.register_module()\nclass SMOKEMono3DHead(AnchorFreeMono3DHead):\n    r\"\"\"Anchor-free head used in `SMOKE <https://arxiv.org/abs/2002.10111>`_\n\n    .. code-block:: none\n\n                /-----> 3*3 conv -----> 1*1 conv -----> cls\n        feature\n                \\-----> 3*3 conv -----> 1*1 conv -----> reg\n\n    Args:\n        num_classes (int): Number of categories excluding the background\n            category.\n        in_channels (int): Number of channels in the input feature map.\n        dim_channel (list[int]): indices of dimension offset preds in\n            regression heatmap channels.\n        ori_channel (list[int]): indices of orientation offset pred in\n            regression heatmap channels.\n        bbox_coder (:obj:`CameraInstance3DBoxes`): Bbox coder\n            for encoding and decoding boxes.\n        loss_cls (dict, optional): Config of classification loss.\n            Default: loss_cls=dict(type='GaussionFocalLoss', loss_weight=1.0).\n        loss_bbox (dict, optional): Config of localization loss.\n            Default: loss_bbox=dict(type='L1Loss', loss_weight=10.0).\n        loss_dir (dict, optional): Config of direction classification loss.\n            In SMOKE, Default: None.\n        loss_attr (dict, optional): Config of attribute classification loss.\n            In SMOKE, Default: None.\n        loss_centerness (dict): Config of centerness loss.\n        norm_cfg (dict): Dictionary to construct and config norm layer.\n            Default: norm_cfg=dict(type='GN', num_groups=32, requires_grad=True).\n        init_cfg (dict): Initialization config dict. Default: None.\n    \"\"\"  # noqa: E501\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 dim_channel,\n                 ori_channel,\n                 bbox_coder,\n                 loss_cls=dict(type='GaussionFocalLoss', loss_weight=1.0),\n                 loss_bbox=dict(type='L1Loss', loss_weight=0.1),\n                 loss_dir=None,\n                 loss_attr=None,\n                 norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),\n                 init_cfg=None,\n                 **kwargs):\n        super().__init__(\n            num_classes,\n            in_channels,\n            loss_cls=loss_cls,\n            loss_bbox=loss_bbox,\n            loss_dir=loss_dir,\n            loss_attr=loss_attr,\n            norm_cfg=norm_cfg,\n            init_cfg=init_cfg,\n            **kwargs)\n        self.dim_channel = dim_channel\n        self.ori_channel = ori_channel\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n\n    def forward(self, feats):\n        \"\"\"Forward features from the upstream network.\n\n        Args:\n            feats (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n\n        Returns:\n            tuple:\n                cls_scores (list[Tensor]): Box scores for each scale level,\n                    each is a 4D-tensor, the channel number is\n                    num_points * num_classes.\n                bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                    level, each is a 4D-tensor, the channel number is\n                    num_points * bbox_code_size.\n        \"\"\"\n        return multi_apply(self.forward_single, feats)\n\n    def forward_single(self, x):\n        \"\"\"Forward features of a single scale level.\n\n        Args:\n            x (Tensor): Input feature map.\n\n        Returns:\n            tuple: Scores for each class, bbox of input feature maps.\n        \"\"\"\n        cls_score, bbox_pred, dir_cls_pred, attr_pred, cls_feat, reg_feat = \\\n            super().forward_single(x)\n        cls_score = cls_score.sigmoid()  # turn to 0-1\n        cls_score = cls_score.clamp(min=1e-4, max=1 - 1e-4)\n        # (N, C, H, W)\n        offset_dims = bbox_pred[:, self.dim_channel, ...]\n        bbox_pred[:, self.dim_channel, ...] = offset_dims.sigmoid() - 0.5\n        # (N, C, H, W)\n        vector_ori = bbox_pred[:, self.ori_channel, ...]\n        bbox_pred[:, self.ori_channel, ...] = F.normalize(vector_ori)\n        return cls_score, bbox_pred\n\n    def get_bboxes(self, cls_scores, bbox_preds, img_metas, rescale=None):\n        \"\"\"Generate bboxes from bbox head predictions.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level.\n            bbox_preds (list[Tensor]): Box regression for each scale.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            rescale (bool): If True, return boxes in original image space.\n\n        Returns:\n            list[tuple[:obj:`CameraInstance3DBoxes`, Tensor, Tensor, None]]:\n                Each item in result_list is 4-tuple.\n        \"\"\"\n        assert len(cls_scores) == len(bbox_preds) == 1\n        cam2imgs = torch.stack([\n            cls_scores[0].new_tensor(img_meta['cam2img'])\n            for img_meta in img_metas\n        ])\n        trans_mats = torch.stack([\n            cls_scores[0].new_tensor(img_meta['trans_mat'])\n            for img_meta in img_metas\n        ])\n        batch_bboxes, batch_scores, batch_topk_labels = self.decode_heatmap(\n            cls_scores[0],\n            bbox_preds[0],\n            img_metas,\n            cam2imgs=cam2imgs,\n            trans_mats=trans_mats,\n            topk=100,\n            kernel=3)\n\n        result_list = []\n        for img_id in range(len(img_metas)):\n\n            bboxes = batch_bboxes[img_id]\n            scores = batch_scores[img_id]\n            labels = batch_topk_labels[img_id]\n\n            keep_idx = scores > 0.25\n            bboxes = bboxes[keep_idx]\n            scores = scores[keep_idx]\n            labels = labels[keep_idx]\n\n            bboxes = img_metas[img_id]['box_type_3d'](\n                bboxes, box_dim=self.bbox_code_size, origin=(0.5, 0.5, 0.5))\n            attrs = None\n            result_list.append((bboxes, scores, labels, attrs))\n\n        return result_list\n\n    def decode_heatmap(self,\n                       cls_score,\n                       reg_pred,\n                       img_metas,\n                       cam2imgs,\n                       trans_mats,\n                       topk=100,\n                       kernel=3):\n        \"\"\"Transform outputs into detections raw bbox predictions.\n\n        Args:\n            class_score (Tensor): Center predict heatmap,\n                shape (B, num_classes, H, W).\n            reg_pred (Tensor): Box regression map.\n                shape (B, channel, H , W).\n            img_metas (List[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            cam2imgs (Tensor): Camera intrinsic matrixs.\n                shape (B, 4, 4)\n            trans_mats (Tensor): Transformation matrix from original image\n                to feature map.\n                shape: (batch, 3, 3)\n            topk (int): Get top k center keypoints from heatmap. Default 100.\n            kernel (int): Max pooling kernel for extract local maximum pixels.\n               Default 3.\n\n        Returns:\n            tuple[torch.Tensor]: Decoded output of SMOKEHead, containing\n               the following Tensors:\n              - batch_bboxes (Tensor): Coords of each 3D box.\n                    shape (B, k, 7)\n              - batch_scores (Tensor): Scores of each 3D box.\n                    shape (B, k)\n              - batch_topk_labels (Tensor): Categories of each 3D box.\n                    shape (B, k)\n        \"\"\"\n        img_h, img_w = img_metas[0]['pad_shape'][:2]\n        bs, _, feat_h, feat_w = cls_score.shape\n\n        center_heatmap_pred = get_local_maximum(cls_score, kernel=kernel)\n\n        *batch_dets, topk_ys, topk_xs = get_topk_from_heatmap(\n            center_heatmap_pred, k=topk)\n        batch_scores, batch_index, batch_topk_labels = batch_dets\n\n        regression = transpose_and_gather_feat(reg_pred, batch_index)\n        regression = regression.view(-1, 8)\n\n        points = torch.cat([topk_xs.view(-1, 1),\n                            topk_ys.view(-1, 1).float()],\n                           dim=1)\n        locations, dimensions, orientations = self.bbox_coder.decode(\n            regression, points, batch_topk_labels, cam2imgs, trans_mats)\n\n        batch_bboxes = torch.cat((locations, dimensions, orientations), dim=1)\n        batch_bboxes = batch_bboxes.view(bs, -1, self.bbox_code_size)\n        return batch_bboxes, batch_scores, batch_topk_labels\n\n    def get_predictions(self, labels3d, centers2d, gt_locations, gt_dimensions,\n                        gt_orientations, indices, img_metas, pred_reg):\n        \"\"\"Prepare predictions for computing loss.\n\n        Args:\n            labels3d (Tensor): Labels of each 3D box.\n                shape (B, max_objs, )\n            centers2d (Tensor): Coords of each projected 3D box\n                center on image. shape (B * max_objs, 2)\n            gt_locations (Tensor): Coords of each 3D box's location.\n                shape (B * max_objs, 3)\n            gt_dimensions (Tensor): Dimensions of each 3D box.\n                shape (N, 3)\n            gt_orientations (Tensor): Orientation(yaw) of each 3D box.\n                shape (N, 1)\n            indices (Tensor): Indices of the existence of the 3D box.\n                shape (B * max_objs, )\n            img_metas (list[dict]): Meta information of each image,\n                e.g., image size, scaling factor, etc.\n            pre_reg (Tensor): Box regression map.\n                shape (B, channel, H , W).\n\n        Returns:\n            dict: the dict has components below:\n            - bbox3d_yaws (:obj:`CameraInstance3DBoxes`):\n                bbox calculated using pred orientations.\n            - bbox3d_dims (:obj:`CameraInstance3DBoxes`):\n                bbox calculated using pred dimensions.\n            - bbox3d_locs (:obj:`CameraInstance3DBoxes`):\n                bbox calculated using pred locations.\n        \"\"\"\n        batch, channel = pred_reg.shape[0], pred_reg.shape[1]\n        w = pred_reg.shape[3]\n        cam2imgs = torch.stack([\n            gt_locations.new_tensor(img_meta['cam2img'])\n            for img_meta in img_metas\n        ])\n        trans_mats = torch.stack([\n            gt_locations.new_tensor(img_meta['trans_mat'])\n            for img_meta in img_metas\n        ])\n        centers2d_inds = centers2d[:, 1] * w + centers2d[:, 0]\n        centers2d_inds = centers2d_inds.view(batch, -1)\n        pred_regression = transpose_and_gather_feat(pred_reg, centers2d_inds)\n        pred_regression_pois = pred_regression.view(-1, channel)\n        locations, dimensions, orientations = self.bbox_coder.decode(\n            pred_regression_pois, centers2d, labels3d, cam2imgs, trans_mats,\n            gt_locations)\n\n        locations, dimensions, orientations = locations[indices], dimensions[\n            indices], orientations[indices]\n\n        locations[:, 1] += dimensions[:, 1] / 2\n\n        gt_locations = gt_locations[indices]\n\n        assert len(locations) == len(gt_locations)\n        assert len(dimensions) == len(gt_dimensions)\n        assert len(orientations) == len(gt_orientations)\n        bbox3d_yaws = self.bbox_coder.encode(gt_locations, gt_dimensions,\n                                             orientations, img_metas)\n        bbox3d_dims = self.bbox_coder.encode(gt_locations, dimensions,\n                                             gt_orientations, img_metas)\n        bbox3d_locs = self.bbox_coder.encode(locations, gt_dimensions,\n                                             gt_orientations, img_metas)\n\n        pred_bboxes = dict(ori=bbox3d_yaws, dim=bbox3d_dims, loc=bbox3d_locs)\n\n        return pred_bboxes\n\n    def get_targets(self, gt_bboxes, gt_labels, gt_bboxes_3d, gt_labels_3d,\n                    centers2d, feat_shape, img_shape, img_metas):\n        \"\"\"Get training targets for batch images.\n\n        Args:\n            gt_bboxes (list[Tensor]): Ground truth bboxes of each image,\n                shape (num_gt, 4).\n            gt_labels (list[Tensor]): Ground truth labels of each box,\n                shape (num_gt,).\n            gt_bboxes_3d (list[:obj:`CameraInstance3DBoxes`]): 3D Ground\n                truth bboxes of each image,\n                shape (num_gt, bbox_code_size).\n            gt_labels_3d (list[Tensor]): 3D Ground truth labels of each\n                box, shape (num_gt,).\n            centers2d (list[Tensor]): Projected 3D centers onto 2D image,\n                shape (num_gt, 2).\n            feat_shape (tuple[int]): Feature map shape with value,\n                shape (B, _, H, W).\n            img_shape (tuple[int]): Image shape in [h, w] format.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n\n        Returns:\n            tuple[Tensor, dict]: The Tensor value is the targets of\n                center heatmap, the dict has components below:\n              - gt_centers2d (Tensor): Coords of each projected 3D box\n                    center on image. shape (B * max_objs, 2)\n              - gt_labels3d (Tensor): Labels of each 3D box.\n                    shape (B, max_objs, )\n              - indices (Tensor): Indices of the existence of the 3D box.\n                    shape (B * max_objs, )\n              - affine_indices (Tensor): Indices of the affine of the 3D box.\n                    shape (N, )\n              - gt_locs (Tensor): Coords of each 3D box's location.\n                    shape (N, 3)\n              - gt_dims (Tensor): Dimensions of each 3D box.\n                    shape (N, 3)\n              - gt_yaws (Tensor): Orientation(yaw) of each 3D box.\n                    shape (N, 1)\n              - gt_cors (Tensor): Coords of the corners of each 3D box.\n                    shape (N, 8, 3)\n        \"\"\"\n\n        reg_mask = torch.stack([\n            gt_bboxes[0].new_tensor(\n                not img_meta['affine_aug'], dtype=torch.bool)\n            for img_meta in img_metas\n        ])\n\n        img_h, img_w = img_shape[:2]\n        bs, _, feat_h, feat_w = feat_shape\n\n        width_ratio = float(feat_w / img_w)  # 1/4\n        height_ratio = float(feat_h / img_h)  # 1/4\n\n        assert width_ratio == height_ratio\n\n        center_heatmap_target = gt_bboxes[-1].new_zeros(\n            [bs, self.num_classes, feat_h, feat_w])\n\n        gt_centers2d = centers2d.copy()\n\n        for batch_id in range(bs):\n            gt_bbox = gt_bboxes[batch_id]\n            gt_label = gt_labels[batch_id]\n            # project centers2d from input image to feat map\n            gt_center2d = gt_centers2d[batch_id] * width_ratio\n\n            for j, center in enumerate(gt_center2d):\n                center_x_int, center_y_int = center.int()\n                scale_box_h = (gt_bbox[j][3] - gt_bbox[j][1]) * height_ratio\n                scale_box_w = (gt_bbox[j][2] - gt_bbox[j][0]) * width_ratio\n                radius = gaussian_radius([scale_box_h, scale_box_w],\n                                         min_overlap=0.7)\n                radius = max(0, int(radius))\n                ind = gt_label[j]\n                gen_gaussian_target(center_heatmap_target[batch_id, ind],\n                                    [center_x_int, center_y_int], radius)\n\n        avg_factor = max(1, center_heatmap_target.eq(1).sum())\n        num_ctrs = [center2d.shape[0] for center2d in centers2d]\n        max_objs = max(num_ctrs)\n\n        reg_inds = torch.cat(\n            [reg_mask[i].repeat(num_ctrs[i]) for i in range(bs)])\n\n        inds = torch.zeros((bs, max_objs),\n                           dtype=torch.bool).to(centers2d[0].device)\n\n        # put gt 3d bboxes to gpu\n        gt_bboxes_3d = [\n            gt_bbox_3d.to(centers2d[0].device) for gt_bbox_3d in gt_bboxes_3d\n        ]\n\n        batch_centers2d = centers2d[0].new_zeros((bs, max_objs, 2))\n        batch_labels_3d = gt_labels_3d[0].new_zeros((bs, max_objs))\n        batch_gt_locations = \\\n            gt_bboxes_3d[0].tensor.new_zeros((bs, max_objs, 3))\n        for i in range(bs):\n            inds[i, :num_ctrs[i]] = 1\n            batch_centers2d[i, :num_ctrs[i]] = centers2d[i]\n            batch_labels_3d[i, :num_ctrs[i]] = gt_labels_3d[i]\n            batch_gt_locations[i, :num_ctrs[i]] = \\\n                gt_bboxes_3d[i].tensor[:, :3]\n\n        inds = inds.flatten()\n        batch_centers2d = batch_centers2d.view(-1, 2) * width_ratio\n        batch_gt_locations = batch_gt_locations.view(-1, 3)\n\n        # filter the empty image, without gt_bboxes_3d\n        gt_bboxes_3d = [\n            gt_bbox_3d for gt_bbox_3d in gt_bboxes_3d\n            if gt_bbox_3d.tensor.shape[0] > 0\n        ]\n\n        gt_dimensions = torch.cat(\n            [gt_bbox_3d.tensor[:, 3:6] for gt_bbox_3d in gt_bboxes_3d])\n        gt_orientations = torch.cat([\n            gt_bbox_3d.tensor[:, 6].unsqueeze(-1)\n            for gt_bbox_3d in gt_bboxes_3d\n        ])\n        gt_corners = torch.cat(\n            [gt_bbox_3d.corners for gt_bbox_3d in gt_bboxes_3d])\n\n        target_labels = dict(\n            gt_centers2d=batch_centers2d.long(),\n            gt_labels3d=batch_labels_3d,\n            indices=inds,\n            reg_indices=reg_inds,\n            gt_locs=batch_gt_locations,\n            gt_dims=gt_dimensions,\n            gt_yaws=gt_orientations,\n            gt_cors=gt_corners)\n\n        return center_heatmap_target, avg_factor, target_labels\n\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             gt_bboxes,\n             gt_labels,\n             gt_bboxes_3d,\n             gt_labels_3d,\n             centers2d,\n             depths,\n             attr_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute loss of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level.\n                shape (num_gt, 4).\n            bbox_preds (list[Tensor]): Box dims is a 4D-tensor, the channel\n                number is bbox_code_size.\n                shape (B, 7, H, W).\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image.\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): Class indices corresponding to each box.\n                shape (num_gts, ).\n            gt_bboxes_3d (list[:obj:`CameraInstance3DBoxes`]): 3D boxes ground\n                truth. it is the flipped gt_bboxes\n            gt_labels_3d (list[Tensor]): Same as gt_labels.\n            centers2d (list[Tensor]): 2D centers on the image.\n                shape (num_gts, 2).\n            depths (list[Tensor]): Depth ground truth.\n                shape (num_gts, ).\n            attr_labels (list[Tensor]): Attributes indices of each box.\n                In kitti it's None.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (None | list[Tensor]): Specify which bounding\n                boxes can be ignored when computing the loss.\n                Default: None.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        assert len(cls_scores) == len(bbox_preds) == 1\n        assert attr_labels is None\n        assert gt_bboxes_ignore is None\n        center2d_heatmap = cls_scores[0]\n        pred_reg = bbox_preds[0]\n\n        center2d_heatmap_target, avg_factor, target_labels = \\\n            self.get_targets(gt_bboxes, gt_labels, gt_bboxes_3d,\n                             gt_labels_3d, centers2d,\n                             center2d_heatmap.shape,\n                             img_metas[0]['pad_shape'],\n                             img_metas)\n\n        pred_bboxes = self.get_predictions(\n            labels3d=target_labels['gt_labels3d'],\n            centers2d=target_labels['gt_centers2d'],\n            gt_locations=target_labels['gt_locs'],\n            gt_dimensions=target_labels['gt_dims'],\n            gt_orientations=target_labels['gt_yaws'],\n            indices=target_labels['indices'],\n            img_metas=img_metas,\n            pred_reg=pred_reg)\n\n        loss_cls = self.loss_cls(\n            center2d_heatmap, center2d_heatmap_target, avg_factor=avg_factor)\n\n        reg_inds = target_labels['reg_indices']\n\n        loss_bbox_oris = self.loss_bbox(\n            pred_bboxes['ori'].corners[reg_inds, ...],\n            target_labels['gt_cors'][reg_inds, ...])\n\n        loss_bbox_dims = self.loss_bbox(\n            pred_bboxes['dim'].corners[reg_inds, ...],\n            target_labels['gt_cors'][reg_inds, ...])\n\n        loss_bbox_locs = self.loss_bbox(\n            pred_bboxes['loc'].corners[reg_inds, ...],\n            target_labels['gt_cors'][reg_inds, ...])\n\n        loss_bbox = loss_bbox_dims + loss_bbox_locs + loss_bbox_oris\n\n        loss_dict = dict(loss_cls=loss_cls, loss_bbox=loss_bbox)\n\n        return loss_dict\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/ssd_3d_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.ops.nms import batched_nms\nfrom mmcv.runner import force_fp32\nfrom torch.nn import functional as F\n\nfrom mmdet3d.core.bbox.structures import (DepthInstance3DBoxes,\n                                          LiDARInstance3DBoxes,\n                                          rotation_3d_in_axis)\nfrom mmdet.core import multi_apply\nfrom ..builder import HEADS, build_loss\nfrom .vote_head import VoteHead\n\n\n@HEADS.register_module()\nclass SSD3DHead(VoteHead):\n    r\"\"\"Bbox head of `3DSSD <https://arxiv.org/abs/2002.10187>`_.\n\n    Args:\n        num_classes (int): The number of class.\n        bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for encoding and\n            decoding boxes.\n        in_channels (int): The number of input feature channel.\n        train_cfg (dict): Config for training.\n        test_cfg (dict): Config for testing.\n        vote_module_cfg (dict): Config of VoteModule for point-wise votes.\n        vote_aggregation_cfg (dict): Config of vote aggregation layer.\n        pred_layer_cfg (dict): Config of classfication and regression\n            prediction layers.\n        conv_cfg (dict): Config of convolution in prediction layer.\n        norm_cfg (dict): Config of BN in prediction layer.\n        act_cfg (dict): Config of activation in prediction layer.\n        objectness_loss (dict): Config of objectness loss.\n        center_loss (dict): Config of center loss.\n        dir_class_loss (dict): Config of direction classification loss.\n        dir_res_loss (dict): Config of direction residual regression loss.\n        size_res_loss (dict): Config of size residual regression loss.\n        corner_loss (dict): Config of bbox corners regression loss.\n        vote_loss (dict): Config of candidate points regression loss.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 bbox_coder,\n                 in_channels=256,\n                 train_cfg=None,\n                 test_cfg=None,\n                 vote_module_cfg=None,\n                 vote_aggregation_cfg=None,\n                 pred_layer_cfg=None,\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=dict(type='BN1d'),\n                 act_cfg=dict(type='ReLU'),\n                 objectness_loss=None,\n                 center_loss=None,\n                 dir_class_loss=None,\n                 dir_res_loss=None,\n                 size_res_loss=None,\n                 corner_loss=None,\n                 vote_loss=None,\n                 init_cfg=None):\n        super(SSD3DHead, self).__init__(\n            num_classes,\n            bbox_coder,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            vote_module_cfg=vote_module_cfg,\n            vote_aggregation_cfg=vote_aggregation_cfg,\n            pred_layer_cfg=pred_layer_cfg,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            objectness_loss=objectness_loss,\n            center_loss=center_loss,\n            dir_class_loss=dir_class_loss,\n            dir_res_loss=dir_res_loss,\n            size_class_loss=None,\n            size_res_loss=size_res_loss,\n            semantic_loss=None,\n            init_cfg=init_cfg)\n\n        self.corner_loss = build_loss(corner_loss)\n        self.vote_loss = build_loss(vote_loss)\n        self.num_candidates = vote_module_cfg['num_points']\n\n    def _get_cls_out_channels(self):\n        \"\"\"Return the channel number of classification outputs.\"\"\"\n        # Class numbers (k) + objectness (1)\n        return self.num_classes\n\n    def _get_reg_out_channels(self):\n        \"\"\"Return the channel number of regression outputs.\"\"\"\n        # Bbox classification and regression\n        # (center residual (3), size regression (3)\n        # heading class+residual (num_dir_bins*2)),\n        return 3 + 3 + self.num_dir_bins * 2\n\n    def _extract_input(self, feat_dict):\n        \"\"\"Extract inputs from features dictionary.\n\n        Args:\n            feat_dict (dict): Feature dict from backbone.\n\n        Returns:\n            torch.Tensor: Coordinates of input points.\n            torch.Tensor: Features of input points.\n            torch.Tensor: Indices of input points.\n        \"\"\"\n        seed_points = feat_dict['sa_xyz'][-1]\n        seed_features = feat_dict['sa_features'][-1]\n        seed_indices = feat_dict['sa_indices'][-1]\n\n        return seed_points, seed_features, seed_indices\n\n    @force_fp32(apply_to=('bbox_preds', ))\n    def loss(self,\n             bbox_preds,\n             points,\n             gt_bboxes_3d,\n             gt_labels_3d,\n             pts_semantic_mask=None,\n             pts_instance_mask=None,\n             img_metas=None,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute loss.\n\n        Args:\n            bbox_preds (dict): Predictions from forward of SSD3DHead.\n            points (list[torch.Tensor]): Input points.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                bboxes of each sample.\n            gt_labels_3d (list[torch.Tensor]): Labels of each sample.\n            pts_semantic_mask (list[torch.Tensor]): Point-wise\n                semantic mask.\n            pts_instance_mask (list[torch.Tensor]): Point-wise\n                instance mask.\n            img_metas (list[dict]): Contain pcd and img's meta info.\n            gt_bboxes_ignore (list[torch.Tensor]): Specify\n                which bounding.\n\n        Returns:\n            dict: Losses of 3DSSD.\n        \"\"\"\n        targets = self.get_targets(points, gt_bboxes_3d, gt_labels_3d,\n                                   pts_semantic_mask, pts_instance_mask,\n                                   bbox_preds)\n        (vote_targets, center_targets, size_res_targets, dir_class_targets,\n         dir_res_targets, mask_targets, centerness_targets, corner3d_targets,\n         vote_mask, positive_mask, negative_mask, centerness_weights,\n         box_loss_weights, heading_res_loss_weight) = targets\n\n        # calculate centerness loss\n        centerness_loss = self.objectness_loss(\n            bbox_preds['obj_scores'].transpose(2, 1),\n            centerness_targets,\n            weight=centerness_weights)\n\n        # calculate center loss\n        center_loss = self.center_loss(\n            bbox_preds['center_offset'],\n            center_targets,\n            weight=box_loss_weights.unsqueeze(-1))\n\n        # calculate direction class loss\n        dir_class_loss = self.dir_class_loss(\n            bbox_preds['dir_class'].transpose(1, 2),\n            dir_class_targets,\n            weight=box_loss_weights)\n\n        # calculate direction residual loss\n        dir_res_loss = self.dir_res_loss(\n            bbox_preds['dir_res_norm'],\n            dir_res_targets.unsqueeze(-1).repeat(1, 1, self.num_dir_bins),\n            weight=heading_res_loss_weight)\n\n        # calculate size residual loss\n        size_loss = self.size_res_loss(\n            bbox_preds['size'],\n            size_res_targets,\n            weight=box_loss_weights.unsqueeze(-1))\n\n        # calculate corner loss\n        one_hot_dir_class_targets = dir_class_targets.new_zeros(\n            bbox_preds['dir_class'].shape)\n        one_hot_dir_class_targets.scatter_(2, dir_class_targets.unsqueeze(-1),\n                                           1)\n        pred_bbox3d = self.bbox_coder.decode(\n            dict(\n                center=bbox_preds['center'],\n                dir_res=bbox_preds['dir_res'],\n                dir_class=one_hot_dir_class_targets,\n                size=bbox_preds['size']))\n        pred_bbox3d = pred_bbox3d.reshape(-1, pred_bbox3d.shape[-1])\n        pred_bbox3d = img_metas[0]['box_type_3d'](\n            pred_bbox3d.clone(),\n            box_dim=pred_bbox3d.shape[-1],\n            with_yaw=self.bbox_coder.with_rot,\n            origin=(0.5, 0.5, 0.5))\n        pred_corners3d = pred_bbox3d.corners.reshape(-1, 8, 3)\n        corner_loss = self.corner_loss(\n            pred_corners3d,\n            corner3d_targets.reshape(-1, 8, 3),\n            weight=box_loss_weights.view(-1, 1, 1))\n\n        # calculate vote loss\n        vote_loss = self.vote_loss(\n            bbox_preds['vote_offset'].transpose(1, 2),\n            vote_targets,\n            weight=vote_mask.unsqueeze(-1))\n\n        losses = dict(\n            centerness_loss=centerness_loss,\n            center_loss=center_loss,\n            dir_class_loss=dir_class_loss,\n            dir_res_loss=dir_res_loss,\n            size_res_loss=size_loss,\n            corner_loss=corner_loss,\n            vote_loss=vote_loss)\n\n        return losses\n\n    def get_targets(self,\n                    points,\n                    gt_bboxes_3d,\n                    gt_labels_3d,\n                    pts_semantic_mask=None,\n                    pts_instance_mask=None,\n                    bbox_preds=None):\n        \"\"\"Generate targets of ssd3d head.\n\n        Args:\n            points (list[torch.Tensor]): Points of each batch.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                bboxes of each batch.\n            gt_labels_3d (list[torch.Tensor]): Labels of each batch.\n            pts_semantic_mask (list[torch.Tensor]): Point-wise semantic\n                label of each batch.\n            pts_instance_mask (list[torch.Tensor]): Point-wise instance\n                label of each batch.\n            bbox_preds (torch.Tensor): Bounding box predictions of ssd3d head.\n\n        Returns:\n            tuple[torch.Tensor]: Targets of ssd3d head.\n        \"\"\"\n        # find empty example\n        for index in range(len(gt_labels_3d)):\n            if len(gt_labels_3d[index]) == 0:\n                fake_box = gt_bboxes_3d[index].tensor.new_zeros(\n                    1, gt_bboxes_3d[index].tensor.shape[-1])\n                gt_bboxes_3d[index] = gt_bboxes_3d[index].new_box(fake_box)\n                gt_labels_3d[index] = gt_labels_3d[index].new_zeros(1)\n\n        if pts_semantic_mask is None:\n            pts_semantic_mask = [None for i in range(len(gt_labels_3d))]\n            pts_instance_mask = [None for i in range(len(gt_labels_3d))]\n\n        aggregated_points = [\n            bbox_preds['aggregated_points'][i]\n            for i in range(len(gt_labels_3d))\n        ]\n\n        seed_points = [\n            bbox_preds['seed_points'][i, :self.num_candidates].detach()\n            for i in range(len(gt_labels_3d))\n        ]\n\n        (vote_targets, center_targets, size_res_targets, dir_class_targets,\n         dir_res_targets, mask_targets, centerness_targets, corner3d_targets,\n         vote_mask, positive_mask, negative_mask) = multi_apply(\n             self.get_targets_single, points, gt_bboxes_3d, gt_labels_3d,\n             pts_semantic_mask, pts_instance_mask, aggregated_points,\n             seed_points)\n\n        center_targets = torch.stack(center_targets)\n        positive_mask = torch.stack(positive_mask)\n        negative_mask = torch.stack(negative_mask)\n        dir_class_targets = torch.stack(dir_class_targets)\n        dir_res_targets = torch.stack(dir_res_targets)\n        size_res_targets = torch.stack(size_res_targets)\n        mask_targets = torch.stack(mask_targets)\n        centerness_targets = torch.stack(centerness_targets).detach()\n        corner3d_targets = torch.stack(corner3d_targets)\n        vote_targets = torch.stack(vote_targets)\n        vote_mask = torch.stack(vote_mask)\n\n        center_targets -= bbox_preds['aggregated_points']\n\n        centerness_weights = (positive_mask +\n                              negative_mask).unsqueeze(-1).repeat(\n                                  1, 1, self.num_classes).float()\n        centerness_weights = centerness_weights / \\\n            (centerness_weights.sum() + 1e-6)\n        vote_mask = vote_mask / (vote_mask.sum() + 1e-6)\n\n        box_loss_weights = positive_mask / (positive_mask.sum() + 1e-6)\n\n        batch_size, proposal_num = dir_class_targets.shape[:2]\n        heading_label_one_hot = dir_class_targets.new_zeros(\n            (batch_size, proposal_num, self.num_dir_bins))\n        heading_label_one_hot.scatter_(2, dir_class_targets.unsqueeze(-1), 1)\n        heading_res_loss_weight = heading_label_one_hot * \\\n            box_loss_weights.unsqueeze(-1)\n\n        return (vote_targets, center_targets, size_res_targets,\n                dir_class_targets, dir_res_targets, mask_targets,\n                centerness_targets, corner3d_targets, vote_mask, positive_mask,\n                negative_mask, centerness_weights, box_loss_weights,\n                heading_res_loss_weight)\n\n    def get_targets_single(self,\n                           points,\n                           gt_bboxes_3d,\n                           gt_labels_3d,\n                           pts_semantic_mask=None,\n                           pts_instance_mask=None,\n                           aggregated_points=None,\n                           seed_points=None):\n        \"\"\"Generate targets of ssd3d head for single batch.\n\n        Args:\n            points (torch.Tensor): Points of each batch.\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth\n                boxes of each batch.\n            gt_labels_3d (torch.Tensor): Labels of each batch.\n            pts_semantic_mask (torch.Tensor): Point-wise semantic\n                label of each batch.\n            pts_instance_mask (torch.Tensor): Point-wise instance\n                label of each batch.\n            aggregated_points (torch.Tensor): Aggregated points from\n                candidate points layer.\n            seed_points (torch.Tensor): Seed points of candidate points.\n\n        Returns:\n            tuple[torch.Tensor]: Targets of ssd3d head.\n        \"\"\"\n        assert self.bbox_coder.with_rot or pts_semantic_mask is not None\n        gt_bboxes_3d = gt_bboxes_3d.to(points.device)\n        valid_gt = gt_labels_3d != -1\n        gt_bboxes_3d = gt_bboxes_3d[valid_gt]\n        gt_labels_3d = gt_labels_3d[valid_gt]\n\n        # Generate fake GT for empty scene\n        if valid_gt.sum() == 0:\n            vote_targets = points.new_zeros(self.num_candidates, 3)\n            center_targets = points.new_zeros(self.num_candidates, 3)\n            size_res_targets = points.new_zeros(self.num_candidates, 3)\n            dir_class_targets = points.new_zeros(\n                self.num_candidates, dtype=torch.int64)\n            dir_res_targets = points.new_zeros(self.num_candidates)\n            mask_targets = points.new_zeros(\n                self.num_candidates, dtype=torch.int64)\n            centerness_targets = points.new_zeros(self.num_candidates,\n                                                  self.num_classes)\n            corner3d_targets = points.new_zeros(self.num_candidates, 8, 3)\n            vote_mask = points.new_zeros(self.num_candidates, dtype=torch.bool)\n            positive_mask = points.new_zeros(\n                self.num_candidates, dtype=torch.bool)\n            negative_mask = points.new_ones(\n                self.num_candidates, dtype=torch.bool)\n            return (vote_targets, center_targets, size_res_targets,\n                    dir_class_targets, dir_res_targets, mask_targets,\n                    centerness_targets, corner3d_targets, vote_mask,\n                    positive_mask, negative_mask)\n\n        gt_corner3d = gt_bboxes_3d.corners\n\n        (center_targets, size_targets, dir_class_targets,\n         dir_res_targets) = self.bbox_coder.encode(gt_bboxes_3d, gt_labels_3d)\n\n        points_mask, assignment = self._assign_targets_by_points_inside(\n            gt_bboxes_3d, aggregated_points)\n\n        center_targets = center_targets[assignment]\n        size_res_targets = size_targets[assignment]\n        mask_targets = gt_labels_3d[assignment]\n        dir_class_targets = dir_class_targets[assignment]\n        dir_res_targets = dir_res_targets[assignment]\n        corner3d_targets = gt_corner3d[assignment]\n\n        top_center_targets = center_targets.clone()\n        top_center_targets[:, 2] += size_res_targets[:, 2]\n        dist = torch.norm(aggregated_points - top_center_targets, dim=1)\n        dist_mask = dist < self.train_cfg.pos_distance_thr\n        positive_mask = (points_mask.max(1)[0] > 0) * dist_mask\n        negative_mask = (points_mask.max(1)[0] == 0)\n\n        # Centerness loss targets\n        canonical_xyz = aggregated_points - center_targets\n        if self.bbox_coder.with_rot:\n            # TODO: Align points rotation implementation of\n            # LiDARInstance3DBoxes and DepthInstance3DBoxes\n            canonical_xyz = rotation_3d_in_axis(\n                canonical_xyz.unsqueeze(0).transpose(0, 1),\n                -gt_bboxes_3d.yaw[assignment],\n                axis=2).squeeze(1)\n        distance_front = torch.clamp(\n            size_res_targets[:, 0] - canonical_xyz[:, 0], min=0)\n        distance_back = torch.clamp(\n            size_res_targets[:, 0] + canonical_xyz[:, 0], min=0)\n        distance_left = torch.clamp(\n            size_res_targets[:, 1] - canonical_xyz[:, 1], min=0)\n        distance_right = torch.clamp(\n            size_res_targets[:, 1] + canonical_xyz[:, 1], min=0)\n        distance_top = torch.clamp(\n            size_res_targets[:, 2] - canonical_xyz[:, 2], min=0)\n        distance_bottom = torch.clamp(\n            size_res_targets[:, 2] + canonical_xyz[:, 2], min=0)\n\n        centerness_l = torch.min(distance_front, distance_back) / torch.max(\n            distance_front, distance_back)\n        centerness_w = torch.min(distance_left, distance_right) / torch.max(\n            distance_left, distance_right)\n        centerness_h = torch.min(distance_bottom, distance_top) / torch.max(\n            distance_bottom, distance_top)\n        centerness_targets = torch.clamp(\n            centerness_l * centerness_w * centerness_h, min=0)\n        centerness_targets = centerness_targets.pow(1 / 3.0)\n        centerness_targets = torch.clamp(centerness_targets, min=0, max=1)\n\n        proposal_num = centerness_targets.shape[0]\n        one_hot_centerness_targets = centerness_targets.new_zeros(\n            (proposal_num, self.num_classes))\n        one_hot_centerness_targets.scatter_(1, mask_targets.unsqueeze(-1), 1)\n        centerness_targets = centerness_targets.unsqueeze(\n            1) * one_hot_centerness_targets\n\n        # Vote loss targets\n        enlarged_gt_bboxes_3d = gt_bboxes_3d.enlarged_box(\n            self.train_cfg.expand_dims_length)\n        enlarged_gt_bboxes_3d.tensor[:, 2] -= self.train_cfg.expand_dims_length\n        vote_mask, vote_assignment = self._assign_targets_by_points_inside(\n            enlarged_gt_bboxes_3d, seed_points)\n\n        vote_targets = gt_bboxes_3d.gravity_center\n        vote_targets = vote_targets[vote_assignment] - seed_points\n        vote_mask = vote_mask.max(1)[0] > 0\n\n        return (vote_targets, center_targets, size_res_targets,\n                dir_class_targets, dir_res_targets, mask_targets,\n                centerness_targets, corner3d_targets, vote_mask, positive_mask,\n                negative_mask)\n\n    def get_bboxes(self, points, bbox_preds, input_metas, rescale=False):\n        \"\"\"Generate bboxes from 3DSSD head predictions.\n\n        Args:\n            points (torch.Tensor): Input points.\n            bbox_preds (dict): Predictions from sdd3d head.\n            input_metas (list[dict]): Point cloud and image's meta info.\n            rescale (bool): Whether to rescale bboxes.\n\n        Returns:\n            list[tuple[torch.Tensor]]: Bounding boxes, scores and labels.\n        \"\"\"\n        # decode boxes\n        sem_scores = F.sigmoid(bbox_preds['obj_scores']).transpose(1, 2)\n        obj_scores = sem_scores.max(-1)[0]\n        bbox3d = self.bbox_coder.decode(bbox_preds)\n\n        batch_size = bbox3d.shape[0]\n        results = list()\n\n        for b in range(batch_size):\n            bbox_selected, score_selected, labels = self.multiclass_nms_single(\n                obj_scores[b], sem_scores[b], bbox3d[b], points[b, ..., :3],\n                input_metas[b])\n\n            bbox = input_metas[b]['box_type_3d'](\n                bbox_selected.clone(),\n                box_dim=bbox_selected.shape[-1],\n                with_yaw=self.bbox_coder.with_rot)\n            results.append((bbox, score_selected, labels))\n\n        return results\n\n    def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points,\n                              input_meta):\n        \"\"\"Multi-class nms in single batch.\n\n        Args:\n            obj_scores (torch.Tensor): Objectness score of bounding boxes.\n            sem_scores (torch.Tensor): Semantic class score of bounding boxes.\n            bbox (torch.Tensor): Predicted bounding boxes.\n            points (torch.Tensor): Input points.\n            input_meta (dict): Point cloud and image's meta info.\n\n        Returns:\n            tuple[torch.Tensor]: Bounding boxes, scores and labels.\n        \"\"\"\n        bbox = input_meta['box_type_3d'](\n            bbox.clone(),\n            box_dim=bbox.shape[-1],\n            with_yaw=self.bbox_coder.with_rot,\n            origin=(0.5, 0.5, 0.5))\n\n        if isinstance(bbox, (LiDARInstance3DBoxes, DepthInstance3DBoxes)):\n            box_indices = bbox.points_in_boxes_all(points)\n            nonempty_box_mask = box_indices.T.sum(1) >= 0\n        else:\n            raise NotImplementedError('Unsupported bbox type!')\n\n        corner3d = bbox.corners\n        minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6)))\n        minmax_box3d[:, :3] = torch.min(corner3d, dim=1)[0]\n        minmax_box3d[:, 3:] = torch.max(corner3d, dim=1)[0]\n\n        bbox_classes = torch.argmax(sem_scores, -1)\n        nms_keep = batched_nms(\n            minmax_box3d[nonempty_box_mask][:, [0, 1, 3, 4]],\n            obj_scores[nonempty_box_mask], bbox_classes[nonempty_box_mask],\n            self.test_cfg.nms_cfg)[1]\n\n        if nms_keep.shape[0] > self.test_cfg.max_output_num:\n            nms_keep = nms_keep[:self.test_cfg.max_output_num]\n\n        # filter empty boxes and boxes with low score\n        scores_mask = (obj_scores >= self.test_cfg.score_thr)\n        nonempty_box_inds = torch.nonzero(\n            nonempty_box_mask, as_tuple=False).flatten()\n        nonempty_mask = torch.zeros_like(bbox_classes).scatter(\n            0, nonempty_box_inds[nms_keep], 1)\n        selected = (nonempty_mask.bool() & scores_mask.bool())\n\n        if self.test_cfg.per_class_proposal:\n            bbox_selected, score_selected, labels = [], [], []\n            for k in range(sem_scores.shape[-1]):\n                bbox_selected.append(bbox[selected].tensor)\n                score_selected.append(obj_scores[selected])\n                labels.append(\n                    torch.zeros_like(bbox_classes[selected]).fill_(k))\n            bbox_selected = torch.cat(bbox_selected, 0)\n            score_selected = torch.cat(score_selected, 0)\n            labels = torch.cat(labels, 0)\n        else:\n            bbox_selected = bbox[selected].tensor\n            score_selected = obj_scores[selected]\n            labels = bbox_classes[selected]\n\n        return bbox_selected, score_selected, labels\n\n    def _assign_targets_by_points_inside(self, bboxes_3d, points):\n        \"\"\"Compute assignment by checking whether point is inside bbox.\n\n        Args:\n            bboxes_3d (BaseInstance3DBoxes): Instance of bounding boxes.\n            points (torch.Tensor): Points of a batch.\n\n        Returns:\n            tuple[torch.Tensor]: Flags indicating whether each point is\n                inside bbox and the index of box where each point are in.\n        \"\"\"\n        if isinstance(bboxes_3d, (LiDARInstance3DBoxes, DepthInstance3DBoxes)):\n            points_mask = bboxes_3d.points_in_boxes_all(points)\n            assignment = points_mask.argmax(dim=-1)\n        else:\n            raise NotImplementedError('Unsupported bbox type!')\n\n        return points_mask, assignment\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/train_mixins.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\n\nfrom mmdet3d.core import limit_period\nfrom mmdet.core import images_to_levels, multi_apply\n\n\nclass AnchorTrainMixin(object):\n    \"\"\"Mixin class for target assigning of dense heads.\"\"\"\n\n    def anchor_target_3d(self,\n                         anchor_list,\n                         gt_bboxes_list,\n                         input_metas,\n                         gt_bboxes_ignore_list=None,\n                         gt_labels_list=None,\n                         label_channels=1,\n                         num_classes=1,\n                         sampling=True):\n        \"\"\"Compute regression and classification targets for anchors.\n\n        Args:\n            anchor_list (list[list]): Multi level anchors of each image.\n            gt_bboxes_list (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                bboxes of each image.\n            input_metas (list[dict]): Meta info of each image.\n            gt_bboxes_ignore_list (list): Ignore list of gt bboxes.\n            gt_labels_list (list[torch.Tensor]): Gt labels of batches.\n            label_channels (int): The channel of labels.\n            num_classes (int): The number of classes.\n            sampling (bool): Whether to sample anchors.\n\n        Returns:\n            tuple (list, list, list, list, list, list, int, int):\n                Anchor targets, including labels, label weights,\n                bbox targets, bbox weights, direction targets,\n                direction weights, number of positive anchors and\n                number of negative anchors.\n        \"\"\"\n        num_imgs = len(input_metas)\n        assert len(anchor_list) == num_imgs\n\n        if isinstance(anchor_list[0][0], list):\n            # sizes of anchors are different\n            # anchor number of a single level\n            num_level_anchors = [\n                sum([anchor.size(0) for anchor in anchors])\n                for anchors in anchor_list[0]\n            ]\n            for i in range(num_imgs):\n                anchor_list[i] = anchor_list[i][0]\n        else:\n            # anchor number of multi levels\n            num_level_anchors = [\n                anchors.view(-1, self.box_code_size).size(0)\n                for anchors in anchor_list[0]\n            ]\n            # concat all level anchors and flags to a single tensor\n            for i in range(num_imgs):\n                anchor_list[i] = torch.cat(anchor_list[i])\n\n        # compute targets for each image\n        if gt_bboxes_ignore_list is None:\n            gt_bboxes_ignore_list = [None for _ in range(num_imgs)]\n        if gt_labels_list is None:\n            gt_labels_list = [None for _ in range(num_imgs)]\n\n        (all_labels, all_label_weights, all_bbox_targets, all_bbox_weights,\n         all_dir_targets, all_dir_weights, pos_inds_list,\n         neg_inds_list) = multi_apply(\n             self.anchor_target_3d_single,\n             anchor_list,\n             gt_bboxes_list,\n             gt_bboxes_ignore_list,\n             gt_labels_list,\n             input_metas,\n             label_channels=label_channels,\n             num_classes=num_classes,\n             sampling=sampling)\n\n        # no valid anchors\n        if any([labels is None for labels in all_labels]):\n            return None\n        # sampled anchors of all images\n        num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])\n        num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])\n        # split targets to a list w.r.t. multiple levels\n        labels_list = images_to_levels(all_labels, num_level_anchors)\n        label_weights_list = images_to_levels(all_label_weights,\n                                              num_level_anchors)\n        bbox_targets_list = images_to_levels(all_bbox_targets,\n                                             num_level_anchors)\n        bbox_weights_list = images_to_levels(all_bbox_weights,\n                                             num_level_anchors)\n        dir_targets_list = images_to_levels(all_dir_targets, num_level_anchors)\n        dir_weights_list = images_to_levels(all_dir_weights, num_level_anchors)\n        return (labels_list, label_weights_list, bbox_targets_list,\n                bbox_weights_list, dir_targets_list, dir_weights_list,\n                num_total_pos, num_total_neg)\n\n    def anchor_target_3d_single(self,\n                                anchors,\n                                gt_bboxes,\n                                gt_bboxes_ignore,\n                                gt_labels,\n                                input_meta,\n                                label_channels=1,\n                                num_classes=1,\n                                sampling=True):\n        \"\"\"Compute targets of anchors in single batch.\n\n        Args:\n            anchors (torch.Tensor): Concatenated multi-level anchor.\n            gt_bboxes (:obj:`BaseInstance3DBoxes`): Gt bboxes.\n            gt_bboxes_ignore (torch.Tensor): Ignored gt bboxes.\n            gt_labels (torch.Tensor): Gt class labels.\n            input_meta (dict): Meta info of each image.\n            label_channels (int): The channel of labels.\n            num_classes (int): The number of classes.\n            sampling (bool): Whether to sample anchors.\n\n        Returns:\n            tuple[torch.Tensor]: Anchor targets.\n        \"\"\"\n        if isinstance(self.bbox_assigner,\n                      list) and (not isinstance(anchors, list)):\n            feat_size = anchors.size(0) * anchors.size(1) * anchors.size(2)\n            rot_angles = anchors.size(-2)\n            assert len(self.bbox_assigner) == anchors.size(-3)\n            (total_labels, total_label_weights, total_bbox_targets,\n             total_bbox_weights, total_dir_targets, total_dir_weights,\n             total_pos_inds, total_neg_inds) = [], [], [], [], [], [], [], []\n            current_anchor_num = 0\n            for i, assigner in enumerate(self.bbox_assigner):\n                current_anchors = anchors[..., i, :, :].reshape(\n                    -1, self.box_code_size)\n                current_anchor_num += current_anchors.size(0)\n                if self.assign_per_class:\n                    gt_per_cls = (gt_labels == i)\n                    anchor_targets = self.anchor_target_single_assigner(\n                        assigner, current_anchors, gt_bboxes[gt_per_cls, :],\n                        gt_bboxes_ignore, gt_labels[gt_per_cls], input_meta,\n                        num_classes, sampling)\n                else:\n                    anchor_targets = self.anchor_target_single_assigner(\n                        assigner, current_anchors, gt_bboxes, gt_bboxes_ignore,\n                        gt_labels, input_meta, num_classes, sampling)\n\n                (labels, label_weights, bbox_targets, bbox_weights,\n                 dir_targets, dir_weights, pos_inds, neg_inds) = anchor_targets\n                total_labels.append(labels.reshape(feat_size, 1, rot_angles))\n                total_label_weights.append(\n                    label_weights.reshape(feat_size, 1, rot_angles))\n                total_bbox_targets.append(\n                    bbox_targets.reshape(feat_size, 1, rot_angles,\n                                         anchors.size(-1)))\n                total_bbox_weights.append(\n                    bbox_weights.reshape(feat_size, 1, rot_angles,\n                                         anchors.size(-1)))\n                total_dir_targets.append(\n                    dir_targets.reshape(feat_size, 1, rot_angles))\n                total_dir_weights.append(\n                    dir_weights.reshape(feat_size, 1, rot_angles))\n                total_pos_inds.append(pos_inds)\n                total_neg_inds.append(neg_inds)\n\n            total_labels = torch.cat(total_labels, dim=-2).reshape(-1)\n            total_label_weights = torch.cat(\n                total_label_weights, dim=-2).reshape(-1)\n            total_bbox_targets = torch.cat(\n                total_bbox_targets, dim=-3).reshape(-1, anchors.size(-1))\n            total_bbox_weights = torch.cat(\n                total_bbox_weights, dim=-3).reshape(-1, anchors.size(-1))\n            total_dir_targets = torch.cat(\n                total_dir_targets, dim=-2).reshape(-1)\n            total_dir_weights = torch.cat(\n                total_dir_weights, dim=-2).reshape(-1)\n            total_pos_inds = torch.cat(total_pos_inds, dim=0).reshape(-1)\n            total_neg_inds = torch.cat(total_neg_inds, dim=0).reshape(-1)\n            return (total_labels, total_label_weights, total_bbox_targets,\n                    total_bbox_weights, total_dir_targets, total_dir_weights,\n                    total_pos_inds, total_neg_inds)\n        elif isinstance(self.bbox_assigner, list) and isinstance(\n                anchors, list):\n            # class-aware anchors with different feature map sizes\n            assert len(self.bbox_assigner) == len(anchors), \\\n                'The number of bbox assigners and anchors should be the same.'\n            (total_labels, total_label_weights, total_bbox_targets,\n             total_bbox_weights, total_dir_targets, total_dir_weights,\n             total_pos_inds, total_neg_inds) = [], [], [], [], [], [], [], []\n            current_anchor_num = 0\n            for i, assigner in enumerate(self.bbox_assigner):\n                current_anchors = anchors[i]\n                current_anchor_num += current_anchors.size(0)\n                if self.assign_per_class:\n                    gt_per_cls = (gt_labels == i)\n                    anchor_targets = self.anchor_target_single_assigner(\n                        assigner, current_anchors, gt_bboxes[gt_per_cls, :],\n                        gt_bboxes_ignore, gt_labels[gt_per_cls], input_meta,\n                        num_classes, sampling)\n                else:\n                    anchor_targets = self.anchor_target_single_assigner(\n                        assigner, current_anchors, gt_bboxes, gt_bboxes_ignore,\n                        gt_labels, input_meta, num_classes, sampling)\n\n                (labels, label_weights, bbox_targets, bbox_weights,\n                 dir_targets, dir_weights, pos_inds, neg_inds) = anchor_targets\n                total_labels.append(labels)\n                total_label_weights.append(label_weights)\n                total_bbox_targets.append(\n                    bbox_targets.reshape(-1, anchors[i].size(-1)))\n                total_bbox_weights.append(\n                    bbox_weights.reshape(-1, anchors[i].size(-1)))\n                total_dir_targets.append(dir_targets)\n                total_dir_weights.append(dir_weights)\n                total_pos_inds.append(pos_inds)\n                total_neg_inds.append(neg_inds)\n\n            total_labels = torch.cat(total_labels, dim=0)\n            total_label_weights = torch.cat(total_label_weights, dim=0)\n            total_bbox_targets = torch.cat(total_bbox_targets, dim=0)\n            total_bbox_weights = torch.cat(total_bbox_weights, dim=0)\n            total_dir_targets = torch.cat(total_dir_targets, dim=0)\n            total_dir_weights = torch.cat(total_dir_weights, dim=0)\n            total_pos_inds = torch.cat(total_pos_inds, dim=0)\n            total_neg_inds = torch.cat(total_neg_inds, dim=0)\n            return (total_labels, total_label_weights, total_bbox_targets,\n                    total_bbox_weights, total_dir_targets, total_dir_weights,\n                    total_pos_inds, total_neg_inds)\n        else:\n            return self.anchor_target_single_assigner(self.bbox_assigner,\n                                                      anchors, gt_bboxes,\n                                                      gt_bboxes_ignore,\n                                                      gt_labels, input_meta,\n                                                      num_classes, sampling)\n\n    def anchor_target_single_assigner(self,\n                                      bbox_assigner,\n                                      anchors,\n                                      gt_bboxes,\n                                      gt_bboxes_ignore,\n                                      gt_labels,\n                                      input_meta,\n                                      num_classes=1,\n                                      sampling=True):\n        \"\"\"Assign anchors and encode positive anchors.\n\n        Args:\n            bbox_assigner (BaseAssigner): assign positive and negative boxes.\n            anchors (torch.Tensor): Concatenated multi-level anchor.\n            gt_bboxes (:obj:`BaseInstance3DBoxes`): Gt bboxes.\n            gt_bboxes_ignore (torch.Tensor): Ignored gt bboxes.\n            gt_labels (torch.Tensor): Gt class labels.\n            input_meta (dict): Meta info of each image.\n            num_classes (int): The number of classes.\n            sampling (bool): Whether to sample anchors.\n\n        Returns:\n            tuple[torch.Tensor]: Anchor targets.\n        \"\"\"\n        anchors = anchors.reshape(-1, anchors.size(-1))\n        num_valid_anchors = anchors.shape[0]\n        bbox_targets = torch.zeros_like(anchors)\n        bbox_weights = torch.zeros_like(anchors)\n        dir_targets = anchors.new_zeros((anchors.shape[0]), dtype=torch.long)\n        dir_weights = anchors.new_zeros((anchors.shape[0]), dtype=torch.float)\n        labels = anchors.new_zeros(num_valid_anchors, dtype=torch.long)\n        label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)\n        if len(gt_bboxes) > 0:\n            if not isinstance(gt_bboxes, torch.Tensor):\n                gt_bboxes = gt_bboxes.tensor.to(anchors.device)\n            assign_result = bbox_assigner.assign(anchors, gt_bboxes,\n                                                 gt_bboxes_ignore, gt_labels)\n            sampling_result = self.bbox_sampler.sample(assign_result, anchors,\n                                                       gt_bboxes)\n            pos_inds = sampling_result.pos_inds\n            neg_inds = sampling_result.neg_inds\n        else:\n            pos_inds = torch.nonzero(\n                anchors.new_zeros((anchors.shape[0], ), dtype=torch.bool) > 0,\n                as_tuple=False).squeeze(-1).unique()\n            neg_inds = torch.nonzero(\n                anchors.new_zeros((anchors.shape[0], ), dtype=torch.bool) == 0,\n                as_tuple=False).squeeze(-1).unique()\n\n        if gt_labels is not None:\n            labels += num_classes\n        if len(pos_inds) > 0:\n            pos_bbox_targets = self.bbox_coder.encode(\n                sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes)\n            pos_dir_targets = get_direction_target(\n                sampling_result.pos_bboxes,\n                pos_bbox_targets,\n                self.dir_offset,\n                self.dir_limit_offset,\n                one_hot=False)\n            bbox_targets[pos_inds, :] = pos_bbox_targets\n            bbox_weights[pos_inds, :] = 1.0\n            dir_targets[pos_inds] = pos_dir_targets\n            dir_weights[pos_inds] = 1.0\n\n            if gt_labels is None:\n                labels[pos_inds] = 1\n            else:\n                labels[pos_inds] = gt_labels[\n                    sampling_result.pos_assigned_gt_inds]\n            if self.train_cfg.pos_weight <= 0:\n                label_weights[pos_inds] = 1.0\n            else:\n                label_weights[pos_inds] = self.train_cfg.pos_weight\n\n        if len(neg_inds) > 0:\n            label_weights[neg_inds] = 1.0\n        return (labels, label_weights, bbox_targets, bbox_weights, dir_targets,\n                dir_weights, pos_inds, neg_inds)\n\n\ndef get_direction_target(anchors,\n                         reg_targets,\n                         dir_offset=0,\n                         dir_limit_offset=0,\n                         num_bins=2,\n                         one_hot=True):\n    \"\"\"Encode direction to 0 ~ num_bins-1.\n\n    Args:\n        anchors (torch.Tensor): Concatenated multi-level anchor.\n        reg_targets (torch.Tensor): Bbox regression targets.\n        dir_offset (int): Direction offset.\n        num_bins (int): Number of bins to divide 2*PI.\n        one_hot (bool): Whether to encode as one hot.\n\n    Returns:\n        torch.Tensor: Encoded direction targets.\n    \"\"\"\n    rot_gt = reg_targets[..., 6] + anchors[..., 6]\n    offset_rot = limit_period(rot_gt - dir_offset, dir_limit_offset, 2 * np.pi)\n    dir_cls_targets = torch.floor(offset_rot / (2 * np.pi / num_bins)).long()\n    dir_cls_targets = torch.clamp(dir_cls_targets, min=0, max=num_bins - 1)\n    if one_hot:\n        dir_targets = torch.zeros(\n            *list(dir_cls_targets.shape),\n            num_bins,\n            dtype=anchors.dtype,\n            device=dir_cls_targets.device)\n        dir_targets.scatter_(dir_cls_targets.unsqueeze(dim=-1).long(), 1.0)\n        dir_cls_targets = dir_targets\n    return dir_cls_targets\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/vote_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nfrom mmcv.ops import furthest_point_sample\nfrom mmcv.runner import BaseModule, force_fp32\nfrom torch.nn import functional as F\n\nfrom mmdet3d.core.post_processing import aligned_3d_nms\nfrom mmdet3d.models.losses import chamfer_distance\nfrom mmdet3d.models.model_utils import VoteModule\nfrom mmdet3d.ops import build_sa_module\nfrom mmdet.core import build_bbox_coder, multi_apply\nfrom ..builder import HEADS, build_loss\nfrom .base_conv_bbox_head import BaseConvBboxHead\n\n\n@HEADS.register_module()\nclass VoteHead(BaseModule):\n    r\"\"\"Bbox head of `Votenet <https://arxiv.org/abs/1904.09664>`_.\n\n    Args:\n        num_classes (int): The number of class.\n        bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for encoding and\n            decoding boxes.\n        train_cfg (dict): Config for training.\n        test_cfg (dict): Config for testing.\n        vote_module_cfg (dict): Config of VoteModule for point-wise votes.\n        vote_aggregation_cfg (dict): Config of vote aggregation layer.\n        pred_layer_cfg (dict): Config of classfication and regression\n            prediction layers.\n        conv_cfg (dict): Config of convolution in prediction layer.\n        norm_cfg (dict): Config of BN in prediction layer.\n        objectness_loss (dict): Config of objectness loss.\n        center_loss (dict): Config of center loss.\n        dir_class_loss (dict): Config of direction classification loss.\n        dir_res_loss (dict): Config of direction residual regression loss.\n        size_class_loss (dict): Config of size classification loss.\n        size_res_loss (dict): Config of size residual regression loss.\n        semantic_loss (dict): Config of point-wise semantic segmentation loss.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 bbox_coder,\n                 train_cfg=None,\n                 test_cfg=None,\n                 vote_module_cfg=None,\n                 vote_aggregation_cfg=None,\n                 pred_layer_cfg=None,\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=dict(type='BN1d'),\n                 objectness_loss=None,\n                 center_loss=None,\n                 dir_class_loss=None,\n                 dir_res_loss=None,\n                 size_class_loss=None,\n                 size_res_loss=None,\n                 semantic_loss=None,\n                 iou_loss=None,\n                 init_cfg=None):\n        super(VoteHead, self).__init__(init_cfg=init_cfg)\n        self.num_classes = num_classes\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.gt_per_seed = vote_module_cfg['gt_per_seed']\n        self.num_proposal = vote_aggregation_cfg['num_point']\n\n        self.objectness_loss = build_loss(objectness_loss)\n        self.center_loss = build_loss(center_loss)\n        self.dir_res_loss = build_loss(dir_res_loss)\n        self.dir_class_loss = build_loss(dir_class_loss)\n        self.size_res_loss = build_loss(size_res_loss)\n        if size_class_loss is not None:\n            self.size_class_loss = build_loss(size_class_loss)\n        if semantic_loss is not None:\n            self.semantic_loss = build_loss(semantic_loss)\n        if iou_loss is not None:\n            self.iou_loss = build_loss(iou_loss)\n        else:\n            self.iou_loss = None\n\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n        self.num_sizes = self.bbox_coder.num_sizes\n        self.num_dir_bins = self.bbox_coder.num_dir_bins\n\n        self.vote_module = VoteModule(**vote_module_cfg)\n        self.vote_aggregation = build_sa_module(vote_aggregation_cfg)\n        self.fp16_enabled = False\n\n        # Bbox classification and regression\n        self.conv_pred = BaseConvBboxHead(\n            **pred_layer_cfg,\n            num_cls_out_channels=self._get_cls_out_channels(),\n            num_reg_out_channels=self._get_reg_out_channels())\n\n    def _get_cls_out_channels(self):\n        \"\"\"Return the channel number of classification outputs.\"\"\"\n        # Class numbers (k) + objectness (2)\n        return self.num_classes + 2\n\n    def _get_reg_out_channels(self):\n        \"\"\"Return the channel number of regression outputs.\"\"\"\n        # Objectness scores (2), center residual (3),\n        # heading class+residual (num_dir_bins*2),\n        # size class+residual(num_sizes*4)\n        return 3 + self.num_dir_bins * 2 + self.num_sizes * 4\n\n    def _extract_input(self, feat_dict):\n        \"\"\"Extract inputs from features dictionary.\n\n        Args:\n            feat_dict (dict): Feature dict from backbone.\n\n        Returns:\n            torch.Tensor: Coordinates of input points.\n            torch.Tensor: Features of input points.\n            torch.Tensor: Indices of input points.\n        \"\"\"\n\n        # for imvotenet\n        if 'seed_points' in feat_dict and \\\n           'seed_features' in feat_dict and \\\n           'seed_indices' in feat_dict:\n            seed_points = feat_dict['seed_points']\n            seed_features = feat_dict['seed_features']\n            seed_indices = feat_dict['seed_indices']\n        # for votenet\n        else:\n            seed_points = feat_dict['fp_xyz'][-1]\n            seed_features = feat_dict['fp_features'][-1]\n            seed_indices = feat_dict['fp_indices'][-1]\n\n        return seed_points, seed_features, seed_indices\n\n    def forward(self, feat_dict, sample_mod):\n        \"\"\"Forward pass.\n\n        Note:\n            The forward of VoteHead is divided into 4 steps:\n\n                1. Generate vote_points from seed_points.\n                2. Aggregate vote_points.\n                3. Predict bbox and score.\n                4. Decode predictions.\n\n        Args:\n            feat_dict (dict): Feature dict from backbone.\n            sample_mod (str): Sample mode for vote aggregation layer.\n                valid modes are \"vote\", \"seed\", \"random\" and \"spec\".\n\n        Returns:\n            dict: Predictions of vote head.\n        \"\"\"\n        assert sample_mod in ['vote', 'seed', 'random', 'spec']\n\n        seed_points, seed_features, seed_indices = self._extract_input(\n            feat_dict)\n\n        # 1. generate vote_points from seed_points\n        vote_points, vote_features, vote_offset = self.vote_module(\n            seed_points, seed_features)\n        results = dict(\n            seed_points=seed_points,\n            seed_indices=seed_indices,\n            vote_points=vote_points,\n            vote_features=vote_features,\n            vote_offset=vote_offset)\n\n        # 2. aggregate vote_points\n        if sample_mod == 'vote':\n            # use fps in vote_aggregation\n            aggregation_inputs = dict(\n                points_xyz=vote_points, features=vote_features)\n        elif sample_mod == 'seed':\n            # FPS on seed and choose the votes corresponding to the seeds\n            sample_indices = furthest_point_sample(seed_points,\n                                                   self.num_proposal)\n            aggregation_inputs = dict(\n                points_xyz=vote_points,\n                features=vote_features,\n                indices=sample_indices)\n        elif sample_mod == 'random':\n            # Random sampling from the votes\n            batch_size, num_seed = seed_points.shape[:2]\n            sample_indices = seed_points.new_tensor(\n                torch.randint(0, num_seed, (batch_size, self.num_proposal)),\n                dtype=torch.int32)\n            aggregation_inputs = dict(\n                points_xyz=vote_points,\n                features=vote_features,\n                indices=sample_indices)\n        elif sample_mod == 'spec':\n            # Specify the new center in vote_aggregation\n            aggregation_inputs = dict(\n                points_xyz=seed_points,\n                features=seed_features,\n                target_xyz=vote_points)\n        else:\n            raise NotImplementedError(\n                f'Sample mode {sample_mod} is not supported!')\n\n        vote_aggregation_ret = self.vote_aggregation(**aggregation_inputs)\n        aggregated_points, features, aggregated_indices = vote_aggregation_ret\n\n        results['aggregated_points'] = aggregated_points\n        results['aggregated_features'] = features\n        results['aggregated_indices'] = aggregated_indices\n\n        # 3. predict bbox and score\n        cls_predictions, reg_predictions = self.conv_pred(features)\n\n        # 4. decode predictions\n        decode_res = self.bbox_coder.split_pred(cls_predictions,\n                                                reg_predictions,\n                                                aggregated_points)\n\n        results.update(decode_res)\n\n        return results\n\n    @force_fp32(apply_to=('bbox_preds', ))\n    def loss(self,\n             bbox_preds,\n             points,\n             gt_bboxes_3d,\n             gt_labels_3d,\n             pts_semantic_mask=None,\n             pts_instance_mask=None,\n             img_metas=None,\n             gt_bboxes_ignore=None,\n             ret_target=False):\n        \"\"\"Compute loss.\n\n        Args:\n            bbox_preds (dict): Predictions from forward of vote head.\n            points (list[torch.Tensor]): Input points.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                bboxes of each sample.\n            gt_labels_3d (list[torch.Tensor]): Labels of each sample.\n            pts_semantic_mask (list[torch.Tensor]): Point-wise\n                semantic mask.\n            pts_instance_mask (list[torch.Tensor]): Point-wise\n                instance mask.\n            img_metas (list[dict]): Contain pcd and img's meta info.\n            gt_bboxes_ignore (list[torch.Tensor]): Specify\n                which bounding.\n            ret_target (Bool): Return targets or not.\n\n        Returns:\n            dict: Losses of Votenet.\n        \"\"\"\n        targets = self.get_targets(points, gt_bboxes_3d, gt_labels_3d,\n                                   pts_semantic_mask, pts_instance_mask,\n                                   bbox_preds)\n        (vote_targets, vote_target_masks, size_class_targets, size_res_targets,\n         dir_class_targets, dir_res_targets, center_targets,\n         assigned_center_targets, mask_targets, valid_gt_masks,\n         objectness_targets, objectness_weights, box_loss_weights,\n         valid_gt_weights) = targets\n\n        # calculate vote loss\n        vote_loss = self.vote_module.get_loss(bbox_preds['seed_points'],\n                                              bbox_preds['vote_points'],\n                                              bbox_preds['seed_indices'],\n                                              vote_target_masks, vote_targets)\n\n        # calculate objectness loss\n        objectness_loss = self.objectness_loss(\n            bbox_preds['obj_scores'].transpose(2, 1),\n            objectness_targets,\n            weight=objectness_weights)\n\n        # calculate center loss\n        source2target_loss, target2source_loss = self.center_loss(\n            bbox_preds['center'],\n            center_targets,\n            src_weight=box_loss_weights,\n            dst_weight=valid_gt_weights)\n        center_loss = source2target_loss + target2source_loss\n\n        # calculate direction class loss\n        dir_class_loss = self.dir_class_loss(\n            bbox_preds['dir_class'].transpose(2, 1),\n            dir_class_targets,\n            weight=box_loss_weights)\n\n        # calculate direction residual loss\n        batch_size, proposal_num = size_class_targets.shape[:2]\n        heading_label_one_hot = vote_targets.new_zeros(\n            (batch_size, proposal_num, self.num_dir_bins))\n        heading_label_one_hot.scatter_(2, dir_class_targets.unsqueeze(-1), 1)\n        dir_res_norm = torch.sum(\n            bbox_preds['dir_res_norm'] * heading_label_one_hot, -1)\n        dir_res_loss = self.dir_res_loss(\n            dir_res_norm, dir_res_targets, weight=box_loss_weights)\n\n        # calculate size class loss\n        size_class_loss = self.size_class_loss(\n            bbox_preds['size_class'].transpose(2, 1),\n            size_class_targets,\n            weight=box_loss_weights)\n\n        # calculate size residual loss\n        one_hot_size_targets = vote_targets.new_zeros(\n            (batch_size, proposal_num, self.num_sizes))\n        one_hot_size_targets.scatter_(2, size_class_targets.unsqueeze(-1), 1)\n        one_hot_size_targets_expand = one_hot_size_targets.unsqueeze(\n            -1).repeat(1, 1, 1, 3).contiguous()\n        size_residual_norm = torch.sum(\n            bbox_preds['size_res_norm'] * one_hot_size_targets_expand, 2)\n        box_loss_weights_expand = box_loss_weights.unsqueeze(-1).repeat(\n            1, 1, 3)\n        size_res_loss = self.size_res_loss(\n            size_residual_norm,\n            size_res_targets,\n            weight=box_loss_weights_expand)\n\n        # calculate semantic loss\n        semantic_loss = self.semantic_loss(\n            bbox_preds['sem_scores'].transpose(2, 1),\n            mask_targets,\n            weight=box_loss_weights)\n\n        losses = dict(\n            vote_loss=vote_loss,\n            objectness_loss=objectness_loss,\n            semantic_loss=semantic_loss,\n            center_loss=center_loss,\n            dir_class_loss=dir_class_loss,\n            dir_res_loss=dir_res_loss,\n            size_class_loss=size_class_loss,\n            size_res_loss=size_res_loss)\n\n        if self.iou_loss:\n            corners_pred = self.bbox_coder.decode_corners(\n                bbox_preds['center'], size_residual_norm,\n                one_hot_size_targets_expand)\n            corners_target = self.bbox_coder.decode_corners(\n                assigned_center_targets, size_res_targets,\n                one_hot_size_targets_expand)\n            iou_loss = self.iou_loss(\n                corners_pred, corners_target, weight=box_loss_weights)\n            losses['iou_loss'] = iou_loss\n\n        if ret_target:\n            losses['targets'] = targets\n\n        return losses\n\n    def get_targets(self,\n                    points,\n                    gt_bboxes_3d,\n                    gt_labels_3d,\n                    pts_semantic_mask=None,\n                    pts_instance_mask=None,\n                    bbox_preds=None):\n        \"\"\"Generate targets of vote head.\n\n        Args:\n            points (list[torch.Tensor]): Points of each batch.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                bboxes of each batch.\n            gt_labels_3d (list[torch.Tensor]): Labels of each batch.\n            pts_semantic_mask (list[torch.Tensor]): Point-wise semantic\n                label of each batch.\n            pts_instance_mask (list[torch.Tensor]): Point-wise instance\n                label of each batch.\n            bbox_preds (torch.Tensor): Bounding box predictions of vote head.\n\n        Returns:\n            tuple[torch.Tensor]: Targets of vote head.\n        \"\"\"\n        # find empty example\n        valid_gt_masks = list()\n        gt_num = list()\n        for index in range(len(gt_labels_3d)):\n            if len(gt_labels_3d[index]) == 0:\n                fake_box = gt_bboxes_3d[index].tensor.new_zeros(\n                    1, gt_bboxes_3d[index].tensor.shape[-1])\n                gt_bboxes_3d[index] = gt_bboxes_3d[index].new_box(fake_box)\n                gt_labels_3d[index] = gt_labels_3d[index].new_zeros(1)\n                valid_gt_masks.append(gt_labels_3d[index].new_zeros(1))\n                gt_num.append(1)\n            else:\n                valid_gt_masks.append(gt_labels_3d[index].new_ones(\n                    gt_labels_3d[index].shape))\n                gt_num.append(gt_labels_3d[index].shape[0])\n        max_gt_num = max(gt_num)\n\n        if pts_semantic_mask is None:\n            pts_semantic_mask = [None for i in range(len(gt_labels_3d))]\n            pts_instance_mask = [None for i in range(len(gt_labels_3d))]\n\n        aggregated_points = [\n            bbox_preds['aggregated_points'][i]\n            for i in range(len(gt_labels_3d))\n        ]\n\n        (vote_targets, vote_target_masks, size_class_targets, size_res_targets,\n         dir_class_targets, dir_res_targets, center_targets,\n         assigned_center_targets, mask_targets, objectness_targets,\n         objectness_masks) = multi_apply(self.get_targets_single, points,\n                                         gt_bboxes_3d, gt_labels_3d,\n                                         pts_semantic_mask, pts_instance_mask,\n                                         aggregated_points)\n\n        # pad targets as original code of votenet.\n        for index in range(len(gt_labels_3d)):\n            pad_num = max_gt_num - gt_labels_3d[index].shape[0]\n            center_targets[index] = F.pad(center_targets[index],\n                                          (0, 0, 0, pad_num))\n            valid_gt_masks[index] = F.pad(valid_gt_masks[index], (0, pad_num))\n\n        vote_targets = torch.stack(vote_targets)\n        vote_target_masks = torch.stack(vote_target_masks)\n        center_targets = torch.stack(center_targets)\n        valid_gt_masks = torch.stack(valid_gt_masks)\n\n        assigned_center_targets = torch.stack(assigned_center_targets)\n        objectness_targets = torch.stack(objectness_targets)\n        objectness_weights = torch.stack(objectness_masks)\n        objectness_weights /= (torch.sum(objectness_weights) + 1e-6)\n        box_loss_weights = objectness_targets.float() / (\n            torch.sum(objectness_targets).float() + 1e-6)\n        valid_gt_weights = valid_gt_masks.float() / (\n            torch.sum(valid_gt_masks.float()) + 1e-6)\n        dir_class_targets = torch.stack(dir_class_targets)\n        dir_res_targets = torch.stack(dir_res_targets)\n        size_class_targets = torch.stack(size_class_targets)\n        size_res_targets = torch.stack(size_res_targets)\n        mask_targets = torch.stack(mask_targets)\n\n        return (vote_targets, vote_target_masks, size_class_targets,\n                size_res_targets, dir_class_targets, dir_res_targets,\n                center_targets, assigned_center_targets, mask_targets,\n                valid_gt_masks, objectness_targets, objectness_weights,\n                box_loss_weights, valid_gt_weights)\n\n    def get_targets_single(self,\n                           points,\n                           gt_bboxes_3d,\n                           gt_labels_3d,\n                           pts_semantic_mask=None,\n                           pts_instance_mask=None,\n                           aggregated_points=None):\n        \"\"\"Generate targets of vote head for single batch.\n\n        Args:\n            points (torch.Tensor): Points of each batch.\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth\n                boxes of each batch.\n            gt_labels_3d (torch.Tensor): Labels of each batch.\n            pts_semantic_mask (torch.Tensor): Point-wise semantic\n                label of each batch.\n            pts_instance_mask (torch.Tensor): Point-wise instance\n                label of each batch.\n            aggregated_points (torch.Tensor): Aggregated points from\n                vote aggregation layer.\n\n        Returns:\n            tuple[torch.Tensor]: Targets of vote head.\n        \"\"\"\n        assert self.bbox_coder.with_rot or pts_semantic_mask is not None\n\n        gt_bboxes_3d = gt_bboxes_3d.to(points.device)\n\n        # generate votes target\n        num_points = points.shape[0]\n        if self.bbox_coder.with_rot:\n            vote_targets = points.new_zeros([num_points, 3 * self.gt_per_seed])\n            vote_target_masks = points.new_zeros([num_points],\n                                                 dtype=torch.long)\n            vote_target_idx = points.new_zeros([num_points], dtype=torch.long)\n            box_indices_all = gt_bboxes_3d.points_in_boxes_all(points)\n            for i in range(gt_labels_3d.shape[0]):\n                box_indices = box_indices_all[:, i]\n                indices = torch.nonzero(\n                    box_indices, as_tuple=False).squeeze(-1)\n                selected_points = points[indices]\n                vote_target_masks[indices] = 1\n                vote_targets_tmp = vote_targets[indices]\n                votes = gt_bboxes_3d.gravity_center[i].unsqueeze(\n                    0) - selected_points[:, :3]\n\n                for j in range(self.gt_per_seed):\n                    column_indices = torch.nonzero(\n                        vote_target_idx[indices] == j,\n                        as_tuple=False).squeeze(-1)\n                    vote_targets_tmp[column_indices,\n                                     int(j * 3):int(j * 3 +\n                                                    3)] = votes[column_indices]\n                    if j == 0:\n                        vote_targets_tmp[column_indices] = votes[\n                            column_indices].repeat(1, self.gt_per_seed)\n\n                vote_targets[indices] = vote_targets_tmp\n                vote_target_idx[indices] = torch.clamp(\n                    vote_target_idx[indices] + 1, max=2)\n        elif pts_semantic_mask is not None:\n            vote_targets = points.new_zeros([num_points, 3])\n            vote_target_masks = points.new_zeros([num_points],\n                                                 dtype=torch.long)\n\n            for i in torch.unique(pts_instance_mask):\n                indices = torch.nonzero(\n                    pts_instance_mask == i, as_tuple=False).squeeze(-1)\n                if pts_semantic_mask[indices[0]] < self.num_classes:\n                    selected_points = points[indices, :3]\n                    center = 0.5 * (\n                        selected_points.min(0)[0] + selected_points.max(0)[0])\n                    vote_targets[indices, :] = center - selected_points\n                    vote_target_masks[indices] = 1\n            vote_targets = vote_targets.repeat((1, self.gt_per_seed))\n        else:\n            raise NotImplementedError\n\n        (center_targets, size_class_targets, size_res_targets,\n         dir_class_targets,\n         dir_res_targets) = self.bbox_coder.encode(gt_bboxes_3d, gt_labels_3d)\n\n        proposal_num = aggregated_points.shape[0]\n        distance1, _, assignment, _ = chamfer_distance(\n            aggregated_points.unsqueeze(0),\n            center_targets.unsqueeze(0),\n            reduction='none')\n        assignment = assignment.squeeze(0)\n        euclidean_distance1 = torch.sqrt(distance1.squeeze(0) + 1e-6)\n\n        objectness_targets = points.new_zeros((proposal_num), dtype=torch.long)\n        objectness_targets[\n            euclidean_distance1 < self.train_cfg['pos_distance_thr']] = 1\n\n        objectness_masks = points.new_zeros((proposal_num))\n        objectness_masks[\n            euclidean_distance1 < self.train_cfg['pos_distance_thr']] = 1.0\n        objectness_masks[\n            euclidean_distance1 > self.train_cfg['neg_distance_thr']] = 1.0\n\n        dir_class_targets = dir_class_targets[assignment]\n        dir_res_targets = dir_res_targets[assignment]\n        dir_res_targets /= (np.pi / self.num_dir_bins)\n        size_class_targets = size_class_targets[assignment]\n        size_res_targets = size_res_targets[assignment]\n\n        one_hot_size_targets = gt_bboxes_3d.tensor.new_zeros(\n            (proposal_num, self.num_sizes))\n        one_hot_size_targets.scatter_(1, size_class_targets.unsqueeze(-1), 1)\n        one_hot_size_targets = one_hot_size_targets.unsqueeze(-1).repeat(\n            1, 1, 3)\n        mean_sizes = size_res_targets.new_tensor(\n            self.bbox_coder.mean_sizes).unsqueeze(0)\n        pos_mean_sizes = torch.sum(one_hot_size_targets * mean_sizes, 1)\n        size_res_targets /= pos_mean_sizes\n\n        mask_targets = gt_labels_3d[assignment]\n        assigned_center_targets = center_targets[assignment]\n\n        return (vote_targets, vote_target_masks, size_class_targets,\n                size_res_targets, dir_class_targets,\n                dir_res_targets, center_targets, assigned_center_targets,\n                mask_targets.long(), objectness_targets, objectness_masks)\n\n    def get_bboxes(self,\n                   points,\n                   bbox_preds,\n                   input_metas,\n                   rescale=False,\n                   use_nms=True):\n        \"\"\"Generate bboxes from vote head predictions.\n\n        Args:\n            points (torch.Tensor): Input points.\n            bbox_preds (dict): Predictions from vote head.\n            input_metas (list[dict]): Point cloud and image's meta info.\n            rescale (bool): Whether to rescale bboxes.\n            use_nms (bool): Whether to apply NMS, skip nms postprocessing\n                while using vote head in rpn stage.\n\n        Returns:\n            list[tuple[torch.Tensor]]: Bounding boxes, scores and labels.\n        \"\"\"\n        # decode boxes\n        obj_scores = F.softmax(bbox_preds['obj_scores'], dim=-1)[..., -1]\n        sem_scores = F.softmax(bbox_preds['sem_scores'], dim=-1)\n        bbox3d = self.bbox_coder.decode(bbox_preds)\n\n        if use_nms:\n            batch_size = bbox3d.shape[0]\n            results = list()\n            for b in range(batch_size):\n                bbox_selected, score_selected, labels = \\\n                    self.multiclass_nms_single(obj_scores[b], sem_scores[b],\n                                               bbox3d[b], points[b, ..., :3],\n                                               input_metas[b])\n                bbox = input_metas[b]['box_type_3d'](\n                    bbox_selected,\n                    box_dim=bbox_selected.shape[-1],\n                    with_yaw=self.bbox_coder.with_rot)\n                results.append((bbox, score_selected, labels))\n\n            return results\n        else:\n            return bbox3d\n\n    def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points,\n                              input_meta):\n        \"\"\"Multi-class nms in single batch.\n\n        Args:\n            obj_scores (torch.Tensor): Objectness score of bounding boxes.\n            sem_scores (torch.Tensor): semantic class score of bounding boxes.\n            bbox (torch.Tensor): Predicted bounding boxes.\n            points (torch.Tensor): Input points.\n            input_meta (dict): Point cloud and image's meta info.\n\n        Returns:\n            tuple[torch.Tensor]: Bounding boxes, scores and labels.\n        \"\"\"\n        bbox = input_meta['box_type_3d'](\n            bbox,\n            box_dim=bbox.shape[-1],\n            with_yaw=self.bbox_coder.with_rot,\n            origin=(0.5, 0.5, 0.5))\n        box_indices = bbox.points_in_boxes_all(points)\n\n        corner3d = bbox.corners\n        minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6)))\n        minmax_box3d[:, :3] = torch.min(corner3d, dim=1)[0]\n        minmax_box3d[:, 3:] = torch.max(corner3d, dim=1)[0]\n\n        nonempty_box_mask = box_indices.T.sum(1) > 5\n\n        bbox_classes = torch.argmax(sem_scores, -1)\n        nms_selected = aligned_3d_nms(minmax_box3d[nonempty_box_mask],\n                                      obj_scores[nonempty_box_mask],\n                                      bbox_classes[nonempty_box_mask],\n                                      self.test_cfg.nms_thr)\n\n        # filter empty boxes and boxes with low score\n        scores_mask = (obj_scores > self.test_cfg.score_thr)\n        nonempty_box_inds = torch.nonzero(\n            nonempty_box_mask, as_tuple=False).flatten()\n        nonempty_mask = torch.zeros_like(bbox_classes).scatter(\n            0, nonempty_box_inds[nms_selected], 1)\n        selected = (nonempty_mask.bool() & scores_mask.bool())\n\n        if self.test_cfg.per_class_proposal:\n            bbox_selected, score_selected, labels = [], [], []\n            for k in range(sem_scores.shape[-1]):\n                bbox_selected.append(bbox[selected].tensor)\n                score_selected.append(obj_scores[selected] *\n                                      sem_scores[selected][:, k])\n                labels.append(\n                    torch.zeros_like(bbox_classes[selected]).fill_(k))\n            bbox_selected = torch.cat(bbox_selected, 0)\n            score_selected = torch.cat(score_selected, 0)\n            labels = torch.cat(labels, 0)\n        else:\n            bbox_selected = bbox[selected].tensor\n            score_selected = obj_scores[selected]\n            labels = bbox_classes[selected]\n\n        return bbox_selected, score_selected, labels\n"
  },
  {
    "path": "mmdet3d/models/detectors/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .base import Base3DDetector\nfrom .bevdet import BEVDepth4D, BEVDet, BEVDet4D, BEVDetTRT\nfrom .centerpoint import CenterPoint\nfrom .dynamic_voxelnet import DynamicVoxelNet\nfrom .fcos_mono3d import FCOSMono3D\nfrom .groupfree3dnet import GroupFree3DNet\nfrom .h3dnet import H3DNet\nfrom .imvotenet import ImVoteNet\nfrom .imvoxelnet import ImVoxelNet\nfrom .mink_single_stage import MinkSingleStage3DDetector\nfrom .mvx_faster_rcnn import DynamicMVXFasterRCNN, MVXFasterRCNN\nfrom .mvx_two_stage import MVXTwoStageDetector\nfrom .parta2 import PartA2\nfrom .point_rcnn import PointRCNN\nfrom .sassd import SASSD\nfrom .single_stage_mono3d import SingleStageMono3DDetector\nfrom .smoke_mono3d import SMOKEMono3D\nfrom .ssd3dnet import SSD3DNet\nfrom .votenet import VoteNet\nfrom .voxelnet import VoxelNet\n\n__all__ = [\n    'Base3DDetector', 'VoxelNet', 'DynamicVoxelNet', 'MVXTwoStageDetector',\n    'DynamicMVXFasterRCNN', 'MVXFasterRCNN', 'PartA2', 'VoteNet', 'H3DNet',\n    'CenterPoint', 'SSD3DNet', 'ImVoteNet', 'SingleStageMono3DDetector',\n    'FCOSMono3D', 'ImVoxelNet', 'GroupFree3DNet', 'PointRCNN', 'SMOKEMono3D',\n    'MinkSingleStage3DDetector', 'SASSD', 'BEVDet', 'BEVDet4D', 'BEVDepth4D',\n    'BEVDetTRT'\n]\n"
  },
  {
    "path": "mmdet3d/models/detectors/base.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom os import path as osp\n\nimport mmcv\nimport torch\nfrom mmcv.parallel import DataContainer as DC\nfrom mmcv.runner import auto_fp16\n\nfrom mmdet3d.core import Box3DMode, Coord3DMode, show_result\nfrom mmdet.models.detectors import BaseDetector\n\n\nclass Base3DDetector(BaseDetector):\n    \"\"\"Base class for detectors.\"\"\"\n\n    def forward_test(self, points, img_metas, img=None, **kwargs):\n        \"\"\"\n        Args:\n            points (list[torch.Tensor]): the outer list indicates test-time\n                augmentations and inner torch.Tensor should have a shape NxC,\n                which contains all points in the batch.\n            img_metas (list[list[dict]]): the outer list indicates test-time\n                augs (multiscale, flip, etc.) and the inner list indicates\n                images in a batch\n            img (list[torch.Tensor], optional): the outer\n                list indicates test-time augmentations and inner\n                torch.Tensor should have a shape NxCxHxW, which contains\n                all images in the batch. Defaults to None.\n        \"\"\"\n        for var, name in [(points, 'points'), (img_metas, 'img_metas')]:\n            if not isinstance(var, list):\n                raise TypeError('{} must be a list, but got {}'.format(\n                    name, type(var)))\n\n        num_augs = len(points)\n        if num_augs != len(img_metas):\n            raise ValueError(\n                'num of augmentations ({}) != num of image meta ({})'.format(\n                    len(points), len(img_metas)))\n\n        if num_augs == 1:\n            img = [img] if img is None else img\n            return self.simple_test(points[0], img_metas[0], img[0], **kwargs)\n        else:\n            return self.aug_test(points, img_metas, img, **kwargs)\n\n    @auto_fp16(apply_to=('img', 'points'))\n    def forward(self, return_loss=True, **kwargs):\n        \"\"\"Calls either forward_train or forward_test depending on whether\n        return_loss=True.\n\n        Note this setting will change the expected inputs. When\n        `return_loss=True`, img and img_metas are single-nested (i.e.\n        torch.Tensor and list[dict]), and when `resturn_loss=False`, img and\n        img_metas should be double nested (i.e.  list[torch.Tensor],\n        list[list[dict]]), with the outer list indicating test time\n        augmentations.\n        \"\"\"\n        if return_loss:\n            return self.forward_train(**kwargs)\n        else:\n            return self.forward_test(**kwargs)\n\n    def show_results(self, data, result, out_dir, show=False, score_thr=None):\n        \"\"\"Results visualization.\n\n        Args:\n            data (list[dict]): Input points and the information of the sample.\n            result (list[dict]): Prediction results.\n            out_dir (str): Output directory of visualization result.\n            show (bool, optional): Determines whether you are\n                going to show result by open3d.\n                Defaults to False.\n            score_thr (float, optional): Score threshold of bounding boxes.\n                Default to None.\n        \"\"\"\n        for batch_id in range(len(result)):\n            if isinstance(data['points'][0], DC):\n                points = data['points'][0]._data[0][batch_id].numpy()\n            elif mmcv.is_list_of(data['points'][0], torch.Tensor):\n                points = data['points'][0][batch_id]\n            else:\n                ValueError(f\"Unsupported data type {type(data['points'][0])} \"\n                           f'for visualization!')\n            if isinstance(data['img_metas'][0], DC):\n                pts_filename = data['img_metas'][0]._data[0][batch_id][\n                    'pts_filename']\n                box_mode_3d = data['img_metas'][0]._data[0][batch_id][\n                    'box_mode_3d']\n            elif mmcv.is_list_of(data['img_metas'][0], dict):\n                pts_filename = data['img_metas'][0][batch_id]['pts_filename']\n                box_mode_3d = data['img_metas'][0][batch_id]['box_mode_3d']\n            else:\n                ValueError(\n                    f\"Unsupported data type {type(data['img_metas'][0])} \"\n                    f'for visualization!')\n            file_name = osp.split(pts_filename)[-1].split('.')[0]\n\n            assert out_dir is not None, 'Expect out_dir, got none.'\n\n            pred_bboxes = result[batch_id]['boxes_3d']\n            pred_labels = result[batch_id]['labels_3d']\n\n            if score_thr is not None:\n                mask = result[batch_id]['scores_3d'] > score_thr\n                pred_bboxes = pred_bboxes[mask]\n                pred_labels = pred_labels[mask]\n\n            # for now we convert points and bbox into depth mode\n            if (box_mode_3d == Box3DMode.CAM) or (box_mode_3d\n                                                  == Box3DMode.LIDAR):\n                points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR,\n                                                   Coord3DMode.DEPTH)\n                pred_bboxes = Box3DMode.convert(pred_bboxes, box_mode_3d,\n                                                Box3DMode.DEPTH)\n            elif box_mode_3d != Box3DMode.DEPTH:\n                ValueError(\n                    f'Unsupported box_mode_3d {box_mode_3d} for conversion!')\n            pred_bboxes = pred_bboxes.tensor.cpu().numpy()\n            show_result(\n                points,\n                None,\n                pred_bboxes,\n                out_dir,\n                file_name,\n                show=show,\n                pred_labels=pred_labels)\n"
  },
  {
    "path": "mmdet3d/models/detectors/bevdet.py",
    "content": "# Copyright (c) Phigent Robotics. All rights reserved.\nimport torch\nimport torch.nn.functional as F\nfrom mmcv.runner import force_fp32\n\nfrom mmdet3d.ops.bev_pool_v2.bev_pool import TRTBEVPoolv2\nfrom mmdet.models import DETECTORS\nfrom .. import builder\nfrom .centerpoint import CenterPoint\n\nfrom mmdet3d.core.bbox import (CameraInstance3DBoxes, DepthInstance3DBoxes,\n                               LiDARInstance3DBoxes, box_np_ops)\nimport torch\nfrom torchvision.utils import make_grid\nimport torchvision\nimport matplotlib.pyplot as plt\nimport cv2\ndef convert_color(img_path):\n    plt.figure()\n    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)\n    plt.imsave(img_path, img, cmap=plt.get_cmap('viridis'))\n    plt.close()\n\n\ndef save_tensor(tensor, path, pad_value=254.0,normalize=False):\n    print('save_tensor', path)\n    tensor = tensor.to(torch.float).detach().cpu()\n    max_ = tensor.flatten(1).max(-1).values[:, None, None]\n    min_ = tensor.flatten(1).min(-1).values[:, None, None]\n    tensor = (tensor-min_)/(max_-min_)\n    if tensor.type() == 'torch.BoolTensor':\n        tensor = tensor*255\n    if len(tensor.shape) == 3:\n        tensor = tensor.unsqueeze(1)\n    tensor = make_grid(tensor, pad_value=pad_value, normalize=normalize).permute(1, 2, 0).numpy().copy()\n    torchvision.utils.save_image(torch.tensor(tensor).permute(2, 0, 1), path)\n    convert_color(path)\n\n@DETECTORS.register_module()\nclass BEVDet(CenterPoint):\n\n    def __init__(self, img_view_transformer, img_bev_encoder_backbone,\n                 img_bev_encoder_neck, **kwargs):\n        super(BEVDet, self).__init__(**kwargs)\n        self.img_view_transformer = builder.build_neck(img_view_transformer)\n        self.img_bev_encoder_backbone = \\\n            builder.build_backbone(img_bev_encoder_backbone)\n        self.img_bev_encoder_neck = builder.build_neck(img_bev_encoder_neck)\n\n    def image_encoder(self, img):\n        imgs = img\n        B, N, C, imH, imW = imgs.shape\n        imgs = imgs.view(B * N, C, imH, imW)\n        x = self.img_backbone(imgs)\n        if self.with_img_neck:\n            x = self.img_neck(x)\n            if type(x) in [list, tuple]:\n                x = x[0]\n        _, output_dim, ouput_H, output_W = x.shape\n        x = x.view(B, N, output_dim, ouput_H, output_W)\n        return x\n\n    @force_fp32()\n    def bev_encoder(self, x):\n        x = self.img_bev_encoder_backbone(x)\n        x = self.img_bev_encoder_neck(x)\n        if type(x) in [list, tuple]:\n            x = x[0]\n        return x\n\n    def extract_img_feat(self, img, img_metas, **kwargs):\n        \"\"\"Extract features of images.\"\"\"\n        x = self.image_encoder(img[0])\n        x, depth = self.img_view_transformer([x] + img[1:7])\n        # from IPython import embed\n        # embed()\n        # exit()\n\n        x = self.bev_encoder(x)\n        return [x], depth\n\n    def extract_feat(self, points, img, img_metas, **kwargs):\n        \"\"\"Extract features from images and points.\"\"\"\n        img_feats, depth = self.extract_img_feat(img, img_metas, **kwargs)\n        pts_feats = None\n        return (img_feats, pts_feats, depth)\n\n    def forward_train(self,\n                      points=None,\n                      img_metas=None,\n                      gt_bboxes_3d=None,\n                      gt_labels_3d=None,\n                      gt_labels=None,\n                      gt_bboxes=None,\n                      img_inputs=None,\n                      proposals=None,\n                      gt_bboxes_ignore=None,\n                      **kwargs):\n        \"\"\"Forward training function.\n\n        Args:\n            points (list[torch.Tensor], optional): Points of each sample.\n                Defaults to None.\n            img_metas (list[dict], optional): Meta information of each sample.\n                Defaults to None.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`], optional):\n                Ground truth 3D boxes. Defaults to None.\n            gt_labels_3d (list[torch.Tensor], optional): Ground truth labels\n                of 3D boxes. Defaults to None.\n            gt_labels (list[torch.Tensor], optional): Ground truth labels\n                of 2D boxes in images. Defaults to None.\n            gt_bboxes (list[torch.Tensor], optional): Ground truth 2D boxes in\n                images. Defaults to None.\n            img (torch.Tensor optional): Images of each sample with shape\n                (N, C, H, W). Defaults to None.\n            proposals ([list[torch.Tensor], optional): Predicted proposals\n                used for training Fast RCNN. Defaults to None.\n            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth\n                2D boxes in images to be ignored. Defaults to None.\n\n        Returns:\n            dict: Losses of different branches.\n        \"\"\"\n        img_feats, pts_feats, _ = self.extract_feat(\n            points, img=img_inputs, img_metas=img_metas, **kwargs)\n        losses = dict()\n        losses_pts = self.forward_pts_train(img_feats, gt_bboxes_3d,\n                                            gt_labels_3d, img_metas,\n                                            gt_bboxes_ignore)\n        losses.update(losses_pts)\n        return losses\n\n    def forward_test(self,\n                     points=None,\n                     img_metas=None,\n                     img_inputs=None,\n                     **kwargs):\n        \"\"\"\n        Args:\n            points (list[torch.Tensor]): the outer list indicates test-time\n                augmentations and inner torch.Tensor should have a shape NxC,\n                which contains all points in the batch.\n            img_metas (list[list[dict]]): the outer list indicates test-time\n                augs (multiscale, flip, etc.) and the inner list indicates\n                images in a batch\n            img (list[torch.Tensor], optional): the outer\n                list indicates test-time augmentations and inner\n                torch.Tensor should have a shape NxCxHxW, which contains\n                all images in the batch. Defaults to None.\n        \"\"\"\n        for var, name in [(img_inputs, 'img_inputs'),\n                          (img_metas, 'img_metas')]:\n            if not isinstance(var, list):\n                raise TypeError('{} must be a list, but got {}'.format(\n                    name, type(var)))\n\n        num_augs = len(img_inputs)\n        if num_augs != len(img_metas):\n            raise ValueError(\n                'num of augmentations ({}) != num of image meta ({})'.format(\n                    len(img_inputs), len(img_metas)))\n\n        if not isinstance(img_inputs[0][0], list):\n            img_inputs = [img_inputs] if img_inputs is None else img_inputs\n            points = [points] if points is None else points\n            return self.simple_test(points[0], img_metas[0], img_inputs[0],\n                                    **kwargs)\n        else:\n            return self.aug_test(None, img_metas[0], img_inputs[0], **kwargs)\n\n    def aug_test(self, points, img_metas, img=None, rescale=False):\n        \"\"\"Test function without augmentaiton.\"\"\"\n        assert False\n\n    def simple_test(self,\n                    points,\n                    img_metas,\n                    img=None,\n                    rescale=False,\n                    **kwargs):\n        \"\"\"Test function without augmentaiton.\"\"\"\n        img_feats, _, _ = self.extract_feat(\n            points, img=img, img_metas=img_metas, **kwargs)\n\n        bbox_list = [dict() for _ in range(len(img_metas))]\n        bbox_pts = self.simple_test_pts(img_feats, img_metas, rescale=rescale)\n        # from IPython import embed\n        # embed()\n        # exit()\n        # x = torch.arange(0, 200, 1) * 0.4 -39.8\n        # y = torch.arange(0, 200, 1) * 0.4 - 39.8\n        # z = torch.arange(0, 16, 1) * 0.4 - 0.8\n        # xx, yy, zz = torch.meshgrid(x, y, z)\n        # points = torch.stack([xx,yy,zz], -1)\n        # points = points.reshape(-1, 3)\n        # import numpy as np\n        # car_index = (bbox_pts[0]['labels_3d'] == 7) &  (bbox_pts[0]['scores_3d']>0.4)\n\n        # mask = box_np_ops.points_in_rbbox(points.numpy(), bbox_pts[0]['boxes_3d'].tensor[car_index].cpu().numpy(), origin=[0.5,0.5,0.])\n        # points = points[mask.sum(-1)>0]\n        # points[:, 0] = torch.tensor((points[:, 0]+39.8)//0.4)\n        # points[:, 1] = torch.tensor((points[:, 1]+39.8)//0.4)\n        # points[:, 2] = torch.tensor((points[:, 2]+0.8)//0.4)\n        # pred_occupancy = torch.zeros([200, 200, 16])\n        # points = points.to(torch.long)\n        # pred_occupancy[points[:, 0], points[:, 1], points[:, 2]] = 7\n        # pred_occupancy= pred_occupancy.cpu().numpy()\n\n        for result_dict, pts_bbox in zip(bbox_list, bbox_pts):\n            pts_bbox['index'] = img_metas[0]['index']\n            result_dict['pts_bbox'] = pts_bbox\n            # result_dict['pred_occupancy'] = pred_occupancy\n            # result_dict['index'] = img_metas[0]['index']\n        return bbox_list\n\n    def forward_dummy(self,\n                      points=None,\n                      img_metas=None,\n                      img_inputs=None,\n                      **kwargs):\n        img_feats, _, _ = self.extract_feat(\n            points, img=img_inputs, img_metas=img_metas, **kwargs)\n        \n        assert self.with_pts_bbox\n        outs = self.pts_bbox_head(img_feats)\n        return outs\n\n\n@DETECTORS.register_module()\nclass BEVDetTRT(BEVDet):\n\n    def result_serialize(self, outs):\n        outs_ = []\n        for out in outs:\n            for key in ['reg', 'height', 'dim', 'rot', 'vel', 'heatmap']:\n                outs_.append(out[0][key])\n        return outs_\n\n    def result_deserialize(self, outs):\n        outs_ = []\n        keys = ['reg', 'height', 'dim', 'rot', 'vel', 'heatmap']\n        for head_id in range(len(outs) // 6):\n            outs_head = [dict()]\n            for kid, key in enumerate(keys):\n                outs_head[0][key] = outs[head_id * 6 + kid]\n            outs_.append(outs_head)\n        return outs_\n\n    def forward(\n        self,\n        img,\n        ranks_depth,\n        ranks_feat,\n        ranks_bev,\n        interval_starts,\n        interval_lengths,\n    ):\n        x = self.img_backbone(img)\n        x = self.img_neck(x)\n        x = self.img_view_transformer.depth_net(x)\n        depth = x[:, :self.img_view_transformer.D].softmax(dim=1)\n        tran_feat = x[:, self.img_view_transformer.D:(\n            self.img_view_transformer.D +\n            self.img_view_transformer.out_channels)]\n        tran_feat = tran_feat.permute(0, 2, 3, 1)\n        x = TRTBEVPoolv2.apply(depth.contiguous(), tran_feat.contiguous(),\n                               ranks_depth, ranks_feat, ranks_bev,\n                               interval_starts, interval_lengths)\n        x = x.permute(0, 3, 1, 2).contiguous()\n        bev_feat = self.bev_encoder(x)\n        outs = self.pts_bbox_head([bev_feat])\n        outs = self.result_serialize(outs)\n        return outs\n\n    def get_bev_pool_input(self, input):\n        coor = self.img_view_transformer.get_lidar_coor(*input[1:7])\n        return self.img_view_transformer.voxel_pooling_prepare_v2(coor)\n\n\n@DETECTORS.register_module()\nclass BEVDet4D(BEVDet):\n\n    def __init__(self,\n                 pre_process=None,\n                 align_after_view_transfromation=False,\n                 num_adj=1,\n                 with_prev=True,\n                 use_depth_supervision = True,\n                 **kwargs):\n        super(BEVDet4D, self).__init__(**kwargs)\n        self.pre_process = pre_process is not None\n        if self.pre_process:\n            self.pre_process_net = builder.build_backbone(pre_process)\n        self.align_after_view_transfromation = align_after_view_transfromation\n        self.num_frame = num_adj + 1\n\n        self.with_prev = with_prev\n        self.use_depth_supervision = use_depth_supervision\n    @force_fp32()\n    def shift_feature(self, input, trans, rots, bda, bda_adj=None):\n        n, c, h, w = input.shape\n        _, v, _ = trans[0].shape\n\n        # generate grid\n        xs = torch.linspace(\n            0, w - 1, w, dtype=input.dtype,\n            device=input.device).view(1, w).expand(h, w)\n        ys = torch.linspace(\n            0, h - 1, h, dtype=input.dtype,\n            device=input.device).view(h, 1).expand(h, w)\n        grid = torch.stack((xs, ys, torch.ones_like(xs)), -1)\n        grid = grid.view(1, h, w, 3).expand(n, h, w, 3).view(n, h, w, 3, 1)\n\n        # get transformation from current ego frame to adjacent ego frame\n        # transformation from current camera frame to current ego frame\n        c02l0 = torch.zeros((n, 1, 4, 4), dtype=grid.dtype).to(grid)\n        c02l0[:, :, :3, :3] = rots[0][:, 0:1, :, :]\n        c02l0[:, :, :3, 3] = trans[0][:, 0:1, :]\n        c02l0[:, :, 3, 3] = 1\n\n        # transformation from adjacent camera frame to current ego frame\n        c12l0 = torch.zeros((n, 1, 4, 4), dtype=grid.dtype).to(grid)\n        c12l0[:, :, :3, :3] = rots[1][:, 0:1, :, :]\n        c12l0[:, :, :3, 3] = trans[1][:, 0:1, :]\n        c12l0[:, :, 3, 3] = 1\n\n        # add bev data augmentation\n        bda_ = torch.zeros((n, 1, 4, 4), dtype=grid.dtype).to(grid)\n        bda_[:, :, :3, :3] = bda.unsqueeze(1)\n        bda_[:, :, 3, 3] = 1\n        c02l0 = bda_.matmul(c02l0)\n        if bda_adj is not None:\n            bda_ = torch.zeros((n, 1, 4, 4), dtype=grid.dtype).to(grid)\n            bda_[:, :, :3, :3] = bda_adj.unsqueeze(1)\n            bda_[:, :, 3, 3] = 1\n        c12l0 = bda_.matmul(c12l0)\n\n        # transformation from current ego frame to adjacent ego frame\n        l02l1 = c02l0.matmul(torch.inverse(c12l0))[:, 0, :, :].view(\n            n, 1, 1, 4, 4)\n        '''\n          c02l0 * inv(c12l0)\n        = c02l0 * inv(l12l0 * c12l1)\n        = c02l0 * inv(c12l1) * inv(l12l0)\n        = l02l1 # c02l0==c12l1\n        '''\n\n        l02l1 = l02l1[:, :, :,\n                      [True, True, False, True], :][:, :, :, :,\n                                                    [True, True, False, True]]\n\n        feat2bev = torch.zeros((3, 3), dtype=grid.dtype).to(grid)\n        feat2bev[0, 0] = self.img_view_transformer.grid_interval[0]\n        feat2bev[1, 1] = self.img_view_transformer.grid_interval[1]\n        feat2bev[0, 2] = self.img_view_transformer.grid_lower_bound[0]\n        feat2bev[1, 2] = self.img_view_transformer.grid_lower_bound[1]\n        feat2bev[2, 2] = 1\n        feat2bev = feat2bev.view(1, 3, 3)\n        tf = torch.inverse(feat2bev).matmul(l02l1).matmul(feat2bev)\n\n        # transform and normalize\n        grid = tf.matmul(grid)\n        normalize_factor = torch.tensor([w - 1.0, h - 1.0],\n                                        dtype=input.dtype,\n                                        device=input.device)\n        grid = grid[:, :, :, :2, 0] / normalize_factor.view(1, 1, 1,\n                                                            2) * 2.0 - 1.0\n        output = F.grid_sample(input, grid.to(input.dtype), align_corners=True)\n        return output\n\n    def prepare_bev_feat(self, img, rot, tran, intrin, post_rot, post_tran,\n                         bda, mlp_input):\n        x = self.image_encoder(img)\n        bev_feat, depth = self.img_view_transformer(\n            [x, rot, tran, intrin, post_rot, post_tran, bda, mlp_input])\n        if self.pre_process:\n            bev_feat = self.pre_process_net(bev_feat)[0]\n        return bev_feat, depth\n\n    def extract_img_feat_sequential(self, inputs, feat_prev):\n        imgs, rots_curr, trans_curr, intrins = inputs[:4]\n        rots_prev, trans_prev, post_rots, post_trans, bda = inputs[4:]\n        bev_feat_list = []\n        mlp_input = self.img_view_transformer.get_mlp_input(\n            rots_curr[0:1, ...], trans_curr[0:1, ...], intrins, post_rots,\n            post_trans, bda[0:1, ...])\n        inputs_curr = (imgs, rots_curr[0:1, ...], trans_curr[0:1, ...],\n                       intrins, post_rots, post_trans, bda[0:1,\n                                                           ...], mlp_input)\n        bev_feat, depth = self.prepare_bev_feat(*inputs_curr)\n        bev_feat_list.append(bev_feat)\n\n        # align the feat_prev\n        _, C, H, W = feat_prev.shape\n        feat_prev = \\\n            self.shift_feature(feat_prev,\n                               [trans_curr, trans_prev],\n                               [rots_curr, rots_prev],\n                               bda)\n        bev_feat_list.append(feat_prev.view(1, (self.num_frame - 1) * C, H, W))\n\n        bev_feat = torch.cat(bev_feat_list, dim=1)\n        x = self.bev_encoder(bev_feat)\n        return [x], depth\n\n    def prepare_inputs(self, inputs):\n        # split the inputs into each frame\n        B, N, _, H, W = inputs[0].shape\n        N = N // self.num_frame\n        imgs = inputs[0].view(B, N, self.num_frame, 3, H, W)\n        imgs = torch.split(imgs, 1, 2)\n        imgs = [t.squeeze(2) for t in imgs]\n        rots, trans, intrins, post_rots, post_trans, bda = inputs[1:7]\n        extra = [\n            rots.view(B, self.num_frame, N, 3, 3),\n            trans.view(B, self.num_frame, N, 3),\n            intrins.view(B, self.num_frame, N, 3, 3),\n            post_rots.view(B, self.num_frame, N, 3, 3),\n            post_trans.view(B, self.num_frame, N, 3)\n        ]\n        extra = [torch.split(t, 1, 1) for t in extra]\n        extra = [[p.squeeze(1) for p in t] for t in extra]\n        rots, trans, intrins, post_rots, post_trans = extra\n        return imgs, rots, trans, intrins, post_rots, post_trans, bda\n\n    def extract_img_feat(self,\n                         img,\n                         img_metas,\n                         pred_prev=False,\n                         sequential=False,\n                         **kwargs):\n\n        if sequential:\n            return self.extract_img_feat_sequential(img, kwargs['feat_prev'])\n        imgs, rots, trans, intrins, post_rots, post_trans, bda = \\\n            self.prepare_inputs(img)\n        \"\"\"Extract features of images.\"\"\"\n        bev_feat_list = []\n        depth_list = []\n        key_frame = True  # back propagation for key frame only\n        for img, rot, tran, intrin, post_rot, post_tran in zip(\n                imgs, rots, trans, intrins, post_rots, post_trans):\n            if key_frame or self.with_prev:\n                if self.align_after_view_transfromation:\n                    rot, tran = rots[0], trans[0]\n                mlp_input = self.img_view_transformer.get_mlp_input(\n                    rots[0], trans[0], intrin, post_rot, post_tran, bda)\n                inputs_curr = (img, rot, tran, intrin, post_rot,\n                               post_tran, bda, mlp_input)\n                if key_frame:\n                    bev_feat, depth = self.prepare_bev_feat(*inputs_curr)\n                else:\n                    with torch.no_grad():\n                        bev_feat, depth = self.prepare_bev_feat(*inputs_curr)\n            else:\n                bev_feat = torch.zeros_like(bev_feat_list[0])\n                depth = None\n            bev_feat_list.append(bev_feat)\n            depth_list.append(depth)\n            key_frame = False\n        if pred_prev:\n            assert self.align_after_view_transfromation\n            assert rots[0].shape[0] == 1\n            feat_prev = torch.cat(bev_feat_list[1:], dim=0)\n            trans_curr = trans[0].repeat(self.num_frame - 1, 1, 1)\n            rots_curr = rots[0].repeat(self.num_frame - 1, 1, 1, 1)\n            trans_prev = torch.cat(trans[1:], dim=0)\n            rots_prev = torch.cat(rots[1:], dim=0)\n            bda_curr = bda.repeat(self.num_frame - 1, 1, 1)\n            return feat_prev, [\n                imgs[0], rots_curr, trans_curr, intrins[0], rots_prev,\n                trans_prev, post_rots[0], post_trans[0], bda_curr\n            ]\n        if self.align_after_view_transfromation:\n            for adj_id in range(1, self.num_frame):\n                bev_feat_list[adj_id] = \\\n                    self.shift_feature(bev_feat_list[adj_id],\n                                       [trans[0], trans[adj_id]],\n                                       [rots[0], rots[adj_id]],\n                                       bda)\n        bev_feat = torch.cat(bev_feat_list, dim=1)\n        x = self.bev_encoder(bev_feat)\n        return [x], depth_list[0]\n\n\n@DETECTORS.register_module()\nclass BEVDepth4D(BEVDet4D):\n\n    def forward_train(self,\n                      points=None,\n                      img_metas=None,\n                      gt_bboxes_3d=None,\n                      gt_labels_3d=None,\n                      gt_labels=None,\n                      gt_bboxes=None,\n                      img_inputs=None,\n                      proposals=None,\n                      gt_bboxes_ignore=None,\n                      **kwargs):\n        \"\"\"Forward training function.\n\n        Args:\n            points (list[torch.Tensor], optional): Points of each sample.\n                Defaults to None.\n            img_metas (list[dict], optional): Meta information of each sample.\n                Defaults to None.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`], optional):\n                Ground truth 3D boxes. Defaults to None.\n            gt_labels_3d (list[torch.Tensor], optional): Ground truth labels\n                of 3D boxes. Defaults to None.\n            gt_labels (list[torch.Tensor], optional): Ground truth labels\n                of 2D boxes in images. Defaults to None.\n            gt_bboxes (list[torch.Tensor], optional): Ground truth 2D boxes in\n                images. Defaults to None.\n            img (torch.Tensor optional): Images of each sample with shape\n                (N, C, H, W). Defaults to None.\n            proposals ([list[torch.Tensor], optional): Predicted proposals\n                used for training Fast RCNN. Defaults to None.\n            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth\n                2D boxes in images to be ignored. Defaults to None.\n\n        Returns:\n            dict: Losses of different branches.\n        \"\"\"\n        img_feats, pts_feats, depth = self.extract_feat(\n            points, img=img_inputs, img_metas=img_metas, **kwargs)\n        gt_depth = kwargs['gt_depth']\n        loss_depth = self.img_view_transformer.get_depth_loss(gt_depth, depth)\n        if not self.use_depth_supervision:\n            loss_depth = loss_depth * 0\n        losses = dict(loss_depth=loss_depth)\n        losses_pts = self.forward_pts_train(img_feats, gt_bboxes_3d,\n                                            gt_labels_3d, img_metas,\n                                            gt_bboxes_ignore)\n\n        losses.update(losses_pts)\n        return losses\n"
  },
  {
    "path": "mmdet3d/models/detectors/centerpoint.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d\nfrom ..builder import DETECTORS\nfrom .mvx_two_stage import MVXTwoStageDetector\n\n\n@DETECTORS.register_module()\nclass CenterPoint(MVXTwoStageDetector):\n    \"\"\"Base class of Multi-modality VoxelNet.\"\"\"\n\n    def __init__(self,\n                 pts_voxel_layer=None,\n                 pts_voxel_encoder=None,\n                 pts_middle_encoder=None,\n                 pts_fusion_layer=None,\n                 img_backbone=None,\n                 pts_backbone=None,\n                 img_neck=None,\n                 pts_neck=None,\n                 pts_bbox_head=None,\n                 img_roi_head=None,\n                 img_rpn_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(CenterPoint,\n              self).__init__(pts_voxel_layer, pts_voxel_encoder,\n                             pts_middle_encoder, pts_fusion_layer,\n                             img_backbone, pts_backbone, img_neck, pts_neck,\n                             pts_bbox_head, img_roi_head, img_rpn_head,\n                             train_cfg, test_cfg, pretrained, init_cfg)\n\n    @property\n    def with_velocity(self):\n        \"\"\"bool: Whether the head predicts velocity\"\"\"\n        return self.pts_bbox_head is not None and \\\n            self.pts_bbox_head.with_velocity\n\n    def extract_pts_feat(self, pts, img_feats, img_metas):\n        \"\"\"Extract features of points.\"\"\"\n        if not self.with_pts_bbox:\n            return None\n\n        voxels, num_points, coors = self.voxelize(pts)\n\n        voxel_features = self.pts_voxel_encoder(voxels, num_points, coors)\n        batch_size = coors[-1, 0] + 1\n        x = self.pts_middle_encoder(voxel_features, coors, batch_size)\n        x = self.pts_backbone(x)\n        if self.with_pts_neck:\n            x = self.pts_neck(x)\n        return x\n\n    def forward_pts_train(self,\n                          pts_feats,\n                          gt_bboxes_3d,\n                          gt_labels_3d,\n                          img_metas,\n                          gt_bboxes_ignore=None):\n        \"\"\"Forward function for point cloud branch.\n\n        Args:\n            pts_feats (list[torch.Tensor]): Features of point cloud branch\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                boxes for each sample.\n            gt_labels_3d (list[torch.Tensor]): Ground truth labels for\n                boxes of each sampole\n            img_metas (list[dict]): Meta information of samples.\n            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth\n                boxes to be ignored. Defaults to None.\n\n        Returns:\n            dict: Losses of each branch.\n        \"\"\"\n        outs = self.pts_bbox_head(pts_feats)\n        loss_inputs = [gt_bboxes_3d, gt_labels_3d, outs]\n        losses = self.pts_bbox_head.loss(*loss_inputs)\n        return losses\n\n    def simple_test_pts(self, x, img_metas, rescale=False):\n        \"\"\"Test function of point cloud branch.\"\"\"\n        outs = self.pts_bbox_head(x)\n        bbox_list = self.pts_bbox_head.get_bboxes(\n            outs, img_metas, rescale=rescale)\n        return bbox_list\n        # bbox_results = [\n        #     bbox3d2result(bboxes, scores, labels)\n        #     for bboxes, scores, labels in bbox_list\n        # ]\n        # return bbox_results\n\n    def aug_test_pts(self, feats, img_metas, rescale=False):\n        \"\"\"Test function of point cloud branch with augmentaiton.\n\n        The function implementation process is as follows:\n\n            - step 1: map features back for double-flip augmentation.\n            - step 2: merge all features and generate boxes.\n            - step 3: map boxes back for scale augmentation.\n            - step 4: merge results.\n\n        Args:\n            feats (list[torch.Tensor]): Feature of point cloud.\n            img_metas (list[dict]): Meta information of samples.\n            rescale (bool, optional): Whether to rescale bboxes.\n                Default: False.\n\n        Returns:\n            dict: Returned bboxes consists of the following keys:\n\n                - boxes_3d (:obj:`LiDARInstance3DBoxes`): Predicted bboxes.\n                - scores_3d (torch.Tensor): Scores of predicted boxes.\n                - labels_3d (torch.Tensor): Labels of predicted boxes.\n        \"\"\"\n        # only support aug_test for one sample\n        outs_list = []\n        for x, img_meta in zip(feats, img_metas):\n            outs = self.pts_bbox_head(x)\n            # merge augmented outputs before decoding bboxes\n            for task_id, out in enumerate(outs):\n                for key in out[0].keys():\n                    if img_meta[0]['pcd_horizontal_flip']:\n                        outs[task_id][0][key] = torch.flip(\n                            outs[task_id][0][key], dims=[2])\n                        if key == 'reg':\n                            outs[task_id][0][key][:, 1, ...] = 1 - outs[\n                                task_id][0][key][:, 1, ...]\n                        elif key == 'rot':\n                            outs[task_id][0][\n                                key][:, 0,\n                                     ...] = -outs[task_id][0][key][:, 0, ...]\n                        elif key == 'vel':\n                            outs[task_id][0][\n                                key][:, 1,\n                                     ...] = -outs[task_id][0][key][:, 1, ...]\n                    if img_meta[0]['pcd_vertical_flip']:\n                        outs[task_id][0][key] = torch.flip(\n                            outs[task_id][0][key], dims=[3])\n                        if key == 'reg':\n                            outs[task_id][0][key][:, 0, ...] = 1 - outs[\n                                task_id][0][key][:, 0, ...]\n                        elif key == 'rot':\n                            outs[task_id][0][\n                                key][:, 1,\n                                     ...] = -outs[task_id][0][key][:, 1, ...]\n                        elif key == 'vel':\n                            outs[task_id][0][\n                                key][:, 0,\n                                     ...] = -outs[task_id][0][key][:, 0, ...]\n\n            outs_list.append(outs)\n\n        preds_dicts = dict()\n        scale_img_metas = []\n\n        # concat outputs sharing the same pcd_scale_factor\n        for i, (img_meta, outs) in enumerate(zip(img_metas, outs_list)):\n            pcd_scale_factor = img_meta[0]['pcd_scale_factor']\n            if pcd_scale_factor not in preds_dicts.keys():\n                preds_dicts[pcd_scale_factor] = outs\n                scale_img_metas.append(img_meta)\n            else:\n                for task_id, out in enumerate(outs):\n                    for key in out[0].keys():\n                        preds_dicts[pcd_scale_factor][task_id][0][key] += out[\n                            0][key]\n\n        aug_bboxes = []\n\n        for pcd_scale_factor, preds_dict in preds_dicts.items():\n            for task_id, pred_dict in enumerate(preds_dict):\n                # merge outputs with different flips before decoding bboxes\n                for key in pred_dict[0].keys():\n                    preds_dict[task_id][0][key] /= len(outs_list) / len(\n                        preds_dicts.keys())\n            bbox_list = self.pts_bbox_head.get_bboxes(\n                preds_dict, img_metas[0], rescale=rescale)\n            bbox_list = [\n                dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels)\n                for bboxes, scores, labels in bbox_list\n            ]\n            aug_bboxes.append(bbox_list[0])\n\n        if len(preds_dicts.keys()) > 1:\n            # merge outputs with different scales after decoding bboxes\n            merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, scale_img_metas,\n                                                self.pts_bbox_head.test_cfg)\n            return merged_bboxes\n        else:\n            for key in bbox_list[0].keys():\n                bbox_list[0][key] = bbox_list[0][key].to('cpu')\n            return bbox_list[0]\n\n    def aug_test(self, points, img_metas, imgs=None, rescale=False):\n        \"\"\"Test function with augmentaiton.\"\"\"\n        img_feats, pts_feats = self.extract_feats(points, img_metas, imgs)\n        bbox_list = dict()\n        if pts_feats and self.with_pts_bbox:\n            pts_bbox = self.aug_test_pts(pts_feats, img_metas, rescale)\n            bbox_list.update(pts_bbox=pts_bbox)\n        return [bbox_list]\n"
  },
  {
    "path": "mmdet3d/models/detectors/dynamic_voxelnet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.runner import force_fp32\nfrom torch.nn import functional as F\n\nfrom ..builder import DETECTORS\nfrom .voxelnet import VoxelNet\n\n\n@DETECTORS.register_module()\nclass DynamicVoxelNet(VoxelNet):\n    r\"\"\"VoxelNet using `dynamic voxelization\n        <https://arxiv.org/abs/1910.06528>`_.\n    \"\"\"\n\n    def __init__(self,\n                 voxel_layer,\n                 voxel_encoder,\n                 middle_encoder,\n                 backbone,\n                 neck=None,\n                 bbox_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(DynamicVoxelNet, self).__init__(\n            voxel_layer=voxel_layer,\n            voxel_encoder=voxel_encoder,\n            middle_encoder=middle_encoder,\n            backbone=backbone,\n            neck=neck,\n            bbox_head=bbox_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained,\n            init_cfg=init_cfg)\n\n    def extract_feat(self, points, img_metas):\n        \"\"\"Extract features from points.\"\"\"\n        voxels, coors = self.voxelize(points)\n        voxel_features, feature_coors = self.voxel_encoder(voxels, coors)\n        batch_size = coors[-1, 0].item() + 1\n        x = self.middle_encoder(voxel_features, feature_coors, batch_size)\n        x = self.backbone(x)\n        if self.with_neck:\n            x = self.neck(x)\n        return x\n\n    @torch.no_grad()\n    @force_fp32()\n    def voxelize(self, points):\n        \"\"\"Apply dynamic voxelization to points.\n\n        Args:\n            points (list[torch.Tensor]): Points of each sample.\n\n        Returns:\n            tuple[torch.Tensor]: Concatenated points and coordinates.\n        \"\"\"\n        coors = []\n        # dynamic voxelization only provide a coors mapping\n        for res in points:\n            res_coors = self.voxel_layer(res)\n            coors.append(res_coors)\n        points = torch.cat(points, dim=0)\n        coors_batch = []\n        for i, coor in enumerate(coors):\n            coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)\n            coors_batch.append(coor_pad)\n        coors_batch = torch.cat(coors_batch, dim=0)\n        return points, coors_batch\n"
  },
  {
    "path": "mmdet3d/models/detectors/fcos_mono3d.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .single_stage_mono3d import SingleStageMono3DDetector\n\n\n@DETECTORS.register_module()\nclass FCOSMono3D(SingleStageMono3DDetector):\n    r\"\"\"`FCOS3D <https://arxiv.org/abs/2104.10956>`_ for monocular 3D object detection.\n\n    Currently please refer to our entry on the\n    `leaderboard <https://www.nuscenes.org/object-detection?externalData=all&mapData=all&modalities=Camera>`_.\n    \"\"\"  # noqa: E501\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None):\n        super(FCOSMono3D, self).__init__(backbone, neck, bbox_head, train_cfg,\n                                         test_cfg, pretrained)\n"
  },
  {
    "path": "mmdet3d/models/detectors/groupfree3dnet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d\nfrom ..builder import DETECTORS\nfrom .single_stage import SingleStage3DDetector\n\n\n@DETECTORS.register_module()\nclass GroupFree3DNet(SingleStage3DDetector):\n    \"\"\"`Group-Free 3D <https://arxiv.org/abs/2104.00678>`_.\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 bbox_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None):\n        super(GroupFree3DNet, self).__init__(\n            backbone=backbone,\n            bbox_head=bbox_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained)\n\n    def forward_train(self,\n                      points,\n                      img_metas,\n                      gt_bboxes_3d,\n                      gt_labels_3d,\n                      pts_semantic_mask=None,\n                      pts_instance_mask=None,\n                      gt_bboxes_ignore=None):\n        \"\"\"Forward of training.\n\n        Args:\n            points (list[torch.Tensor]): Points of each batch.\n            img_metas (list): Image metas.\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.\n            gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.\n            pts_semantic_mask (list[torch.Tensor]): point-wise semantic\n                label of each batch.\n            pts_instance_mask (list[torch.Tensor]): point-wise instance\n                label of each batch.\n            gt_bboxes_ignore (list[torch.Tensor]): Specify\n                which bounding.\n\n        Returns:\n            dict[str: torch.Tensor]: Losses.\n        \"\"\"\n        # TODO: refactor votenet series to reduce redundant codes.\n        points_cat = torch.stack(points)\n\n        x = self.extract_feat(points_cat)\n        bbox_preds = self.bbox_head(x, self.train_cfg.sample_mod)\n        loss_inputs = (points, gt_bboxes_3d, gt_labels_3d, pts_semantic_mask,\n                       pts_instance_mask, img_metas)\n        losses = self.bbox_head.loss(\n            bbox_preds, *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)\n        return losses\n\n    def simple_test(self, points, img_metas, imgs=None, rescale=False):\n        \"\"\"Forward of testing.\n\n        Args:\n            points (list[torch.Tensor]): Points of each sample.\n            img_metas (list): Image metas.\n            rescale (bool): Whether to rescale results.\n        Returns:\n            list: Predicted 3d boxes.\n        \"\"\"\n        points_cat = torch.stack(points)\n\n        x = self.extract_feat(points_cat)\n        bbox_preds = self.bbox_head(x, self.test_cfg.sample_mod)\n        bbox_list = self.bbox_head.get_bboxes(\n            points_cat, bbox_preds, img_metas, rescale=rescale)\n        bbox_results = [\n            bbox3d2result(bboxes, scores, labels)\n            for bboxes, scores, labels in bbox_list\n        ]\n        return bbox_results\n\n    def aug_test(self, points, img_metas, imgs=None, rescale=False):\n        \"\"\"Test with augmentation.\"\"\"\n        points_cat = [torch.stack(pts) for pts in points]\n        feats = self.extract_feats(points_cat, img_metas)\n\n        # only support aug_test for one sample\n        aug_bboxes = []\n        for x, pts_cat, img_meta in zip(feats, points_cat, img_metas):\n            bbox_preds = self.bbox_head(x, self.test_cfg.sample_mod)\n            bbox_list = self.bbox_head.get_bboxes(\n                pts_cat, bbox_preds, img_meta, rescale=rescale)\n            bbox_list = [\n                dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels)\n                for bboxes, scores, labels in bbox_list\n            ]\n            aug_bboxes.append(bbox_list[0])\n\n        # after merging, bboxes will be rescaled to the original image size\n        merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas,\n                                            self.bbox_head.test_cfg)\n\n        return [merged_bboxes]\n"
  },
  {
    "path": "mmdet3d/models/detectors/h3dnet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet3d.core import merge_aug_bboxes_3d\nfrom ..builder import DETECTORS\nfrom .two_stage import TwoStage3DDetector\n\n\n@DETECTORS.register_module()\nclass H3DNet(TwoStage3DDetector):\n    r\"\"\"H3DNet model.\n\n    Please refer to the `paper <https://arxiv.org/abs/2006.05682>`_\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck=None,\n                 rpn_head=None,\n                 roi_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(H3DNet, self).__init__(\n            backbone=backbone,\n            neck=neck,\n            rpn_head=rpn_head,\n            roi_head=roi_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained,\n            init_cfg=init_cfg)\n\n    def forward_train(self,\n                      points,\n                      img_metas,\n                      gt_bboxes_3d,\n                      gt_labels_3d,\n                      pts_semantic_mask=None,\n                      pts_instance_mask=None,\n                      gt_bboxes_ignore=None):\n        \"\"\"Forward of training.\n\n        Args:\n            points (list[torch.Tensor]): Points of each batch.\n            img_metas (list): Image metas.\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.\n            gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.\n            pts_semantic_mask (list[torch.Tensor]): point-wise semantic\n                label of each batch.\n            pts_instance_mask (list[torch.Tensor]): point-wise instance\n                label of each batch.\n            gt_bboxes_ignore (list[torch.Tensor]): Specify\n                which bounding.\n\n        Returns:\n            dict: Losses.\n        \"\"\"\n        points_cat = torch.stack(points)\n\n        feats_dict = self.extract_feat(points_cat)\n        feats_dict['fp_xyz'] = [feats_dict['fp_xyz_net0'][-1]]\n        feats_dict['fp_features'] = [feats_dict['hd_feature']]\n        feats_dict['fp_indices'] = [feats_dict['fp_indices_net0'][-1]]\n\n        losses = dict()\n        if self.with_rpn:\n            rpn_outs = self.rpn_head(feats_dict, self.train_cfg.rpn.sample_mod)\n            feats_dict.update(rpn_outs)\n\n            rpn_loss_inputs = (points, gt_bboxes_3d, gt_labels_3d,\n                               pts_semantic_mask, pts_instance_mask, img_metas)\n            rpn_losses = self.rpn_head.loss(\n                rpn_outs,\n                *rpn_loss_inputs,\n                gt_bboxes_ignore=gt_bboxes_ignore,\n                ret_target=True)\n            feats_dict['targets'] = rpn_losses.pop('targets')\n            losses.update(rpn_losses)\n\n            # Generate rpn proposals\n            proposal_cfg = self.train_cfg.get('rpn_proposal',\n                                              self.test_cfg.rpn)\n            proposal_inputs = (points, rpn_outs, img_metas)\n            proposal_list = self.rpn_head.get_bboxes(\n                *proposal_inputs, use_nms=proposal_cfg.use_nms)\n            feats_dict['proposal_list'] = proposal_list\n        else:\n            raise NotImplementedError\n\n        roi_losses = self.roi_head.forward_train(feats_dict, img_metas, points,\n                                                 gt_bboxes_3d, gt_labels_3d,\n                                                 pts_semantic_mask,\n                                                 pts_instance_mask,\n                                                 gt_bboxes_ignore)\n        losses.update(roi_losses)\n\n        return losses\n\n    def simple_test(self, points, img_metas, imgs=None, rescale=False):\n        \"\"\"Forward of testing.\n\n        Args:\n            points (list[torch.Tensor]): Points of each sample.\n            img_metas (list): Image metas.\n            rescale (bool): Whether to rescale results.\n\n        Returns:\n            list: Predicted 3d boxes.\n        \"\"\"\n        points_cat = torch.stack(points)\n\n        feats_dict = self.extract_feat(points_cat)\n        feats_dict['fp_xyz'] = [feats_dict['fp_xyz_net0'][-1]]\n        feats_dict['fp_features'] = [feats_dict['hd_feature']]\n        feats_dict['fp_indices'] = [feats_dict['fp_indices_net0'][-1]]\n\n        if self.with_rpn:\n            proposal_cfg = self.test_cfg.rpn\n            rpn_outs = self.rpn_head(feats_dict, proposal_cfg.sample_mod)\n            feats_dict.update(rpn_outs)\n            # Generate rpn proposals\n            proposal_list = self.rpn_head.get_bboxes(\n                points, rpn_outs, img_metas, use_nms=proposal_cfg.use_nms)\n            feats_dict['proposal_list'] = proposal_list\n        else:\n            raise NotImplementedError\n\n        return self.roi_head.simple_test(\n            feats_dict, img_metas, points_cat, rescale=rescale)\n\n    def aug_test(self, points, img_metas, imgs=None, rescale=False):\n        \"\"\"Test with augmentation.\"\"\"\n        points_cat = [torch.stack(pts) for pts in points]\n        feats_dict = self.extract_feats(points_cat, img_metas)\n        for feat_dict in feats_dict:\n            feat_dict['fp_xyz'] = [feat_dict['fp_xyz_net0'][-1]]\n            feat_dict['fp_features'] = [feat_dict['hd_feature']]\n            feat_dict['fp_indices'] = [feat_dict['fp_indices_net0'][-1]]\n\n        # only support aug_test for one sample\n        aug_bboxes = []\n        for feat_dict, pts_cat, img_meta in zip(feats_dict, points_cat,\n                                                img_metas):\n            if self.with_rpn:\n                proposal_cfg = self.test_cfg.rpn\n                rpn_outs = self.rpn_head(feat_dict, proposal_cfg.sample_mod)\n                feat_dict.update(rpn_outs)\n                # Generate rpn proposals\n                proposal_list = self.rpn_head.get_bboxes(\n                    points, rpn_outs, img_metas, use_nms=proposal_cfg.use_nms)\n                feat_dict['proposal_list'] = proposal_list\n            else:\n                raise NotImplementedError\n\n            bbox_results = self.roi_head.simple_test(\n                feat_dict,\n                self.test_cfg.rcnn.sample_mod,\n                img_meta,\n                pts_cat,\n                rescale=rescale)\n            aug_bboxes.append(bbox_results)\n\n        # after merging, bboxes will be rescaled to the original image size\n        merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas,\n                                            self.bbox_head.test_cfg)\n\n        return [merged_bboxes]\n\n    def extract_feats(self, points, img_metas):\n        \"\"\"Extract features of multiple samples.\"\"\"\n        return [\n            self.extract_feat(pts, img_meta)\n            for pts, img_meta in zip(points, img_metas)\n        ]\n"
  },
  {
    "path": "mmdet3d/models/detectors/imvotenet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport numpy as np\nimport torch\n\nfrom mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d\nfrom mmdet3d.models.utils import MLP\nfrom .. import builder\nfrom ..builder import DETECTORS\nfrom .base import Base3DDetector\n\n\ndef sample_valid_seeds(mask, num_sampled_seed=1024):\n    r\"\"\"Randomly sample seeds from all imvotes.\n\n    Modified from `<https://github.com/facebookresearch/imvotenet/blob/a8856345146bacf29a57266a2f0b874406fd8823/models/imvotenet.py#L26>`_\n\n    Args:\n        mask (torch.Tensor): Bool tensor in shape (\n            seed_num*max_imvote_per_pixel), indicates\n            whether this imvote corresponds to a 2D bbox.\n        num_sampled_seed (int): How many to sample from all imvotes.\n\n    Returns:\n        torch.Tensor: Indices with shape (num_sampled_seed).\n    \"\"\"  # noqa: E501\n    device = mask.device\n    batch_size = mask.shape[0]\n    sample_inds = mask.new_zeros((batch_size, num_sampled_seed),\n                                 dtype=torch.int64)\n    for bidx in range(batch_size):\n        # return index of non zero elements\n        valid_inds = torch.nonzero(mask[bidx, :]).squeeze(-1)\n        if len(valid_inds) < num_sampled_seed:\n            # compute set t1 - t2\n            t1 = torch.arange(num_sampled_seed, device=device)\n            t2 = valid_inds % num_sampled_seed\n            combined = torch.cat((t1, t2))\n            uniques, counts = combined.unique(return_counts=True)\n            difference = uniques[counts == 1]\n\n            rand_inds = torch.randperm(\n                len(difference),\n                device=device)[:num_sampled_seed - len(valid_inds)]\n            cur_sample_inds = difference[rand_inds]\n            cur_sample_inds = torch.cat((valid_inds, cur_sample_inds))\n        else:\n            rand_inds = torch.randperm(\n                len(valid_inds), device=device)[:num_sampled_seed]\n            cur_sample_inds = valid_inds[rand_inds]\n        sample_inds[bidx, :] = cur_sample_inds\n    return sample_inds\n\n\n@DETECTORS.register_module()\nclass ImVoteNet(Base3DDetector):\n    r\"\"\"`ImVoteNet <https://arxiv.org/abs/2001.10692>`_ for 3D detection.\"\"\"\n\n    def __init__(self,\n                 pts_backbone=None,\n                 pts_bbox_heads=None,\n                 pts_neck=None,\n                 img_backbone=None,\n                 img_neck=None,\n                 img_roi_head=None,\n                 img_rpn_head=None,\n                 img_mlp=None,\n                 freeze_img_branch=False,\n                 fusion_layer=None,\n                 num_sampled_seed=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n\n        super(ImVoteNet, self).__init__(init_cfg=init_cfg)\n\n        # point branch\n        if pts_backbone is not None:\n            self.pts_backbone = builder.build_backbone(pts_backbone)\n        if pts_neck is not None:\n            self.pts_neck = builder.build_neck(pts_neck)\n        if pts_bbox_heads is not None:\n            pts_bbox_head_common = pts_bbox_heads.common\n            pts_bbox_head_common.update(\n                train_cfg=train_cfg.pts if train_cfg is not None else None)\n            pts_bbox_head_common.update(test_cfg=test_cfg.pts)\n            pts_bbox_head_joint = pts_bbox_head_common.copy()\n            pts_bbox_head_joint.update(pts_bbox_heads.joint)\n            pts_bbox_head_pts = pts_bbox_head_common.copy()\n            pts_bbox_head_pts.update(pts_bbox_heads.pts)\n            pts_bbox_head_img = pts_bbox_head_common.copy()\n            pts_bbox_head_img.update(pts_bbox_heads.img)\n\n            self.pts_bbox_head_joint = builder.build_head(pts_bbox_head_joint)\n            self.pts_bbox_head_pts = builder.build_head(pts_bbox_head_pts)\n            self.pts_bbox_head_img = builder.build_head(pts_bbox_head_img)\n            self.pts_bbox_heads = [\n                self.pts_bbox_head_joint, self.pts_bbox_head_pts,\n                self.pts_bbox_head_img\n            ]\n            self.loss_weights = pts_bbox_heads.loss_weights\n\n        # image branch\n        if img_backbone:\n            self.img_backbone = builder.build_backbone(img_backbone)\n        if img_neck is not None:\n            self.img_neck = builder.build_neck(img_neck)\n        if img_rpn_head is not None:\n            rpn_train_cfg = train_cfg.img_rpn if train_cfg \\\n                is not None else None\n            img_rpn_head_ = img_rpn_head.copy()\n            img_rpn_head_.update(\n                train_cfg=rpn_train_cfg, test_cfg=test_cfg.img_rpn)\n            self.img_rpn_head = builder.build_head(img_rpn_head_)\n        if img_roi_head is not None:\n            rcnn_train_cfg = train_cfg.img_rcnn if train_cfg \\\n                is not None else None\n            img_roi_head.update(\n                train_cfg=rcnn_train_cfg, test_cfg=test_cfg.img_rcnn)\n            self.img_roi_head = builder.build_head(img_roi_head)\n\n        # fusion\n        if fusion_layer is not None:\n            self.fusion_layer = builder.build_fusion_layer(fusion_layer)\n            self.max_imvote_per_pixel = fusion_layer.max_imvote_per_pixel\n\n        self.freeze_img_branch = freeze_img_branch\n        if freeze_img_branch:\n            self.freeze_img_branch_params()\n\n        if img_mlp is not None:\n            self.img_mlp = MLP(**img_mlp)\n\n        self.num_sampled_seed = num_sampled_seed\n\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n\n        if pretrained is None:\n            img_pretrained = None\n            pts_pretrained = None\n        elif isinstance(pretrained, dict):\n            img_pretrained = pretrained.get('img', None)\n            pts_pretrained = pretrained.get('pts', None)\n        else:\n            raise ValueError(\n                f'pretrained should be a dict, got {type(pretrained)}')\n\n        if self.with_img_backbone:\n            if img_pretrained is not None:\n                warnings.warn('DeprecationWarning: pretrained is a deprecated '\n                              'key, please consider using init_cfg.')\n                self.img_backbone.init_cfg = dict(\n                    type='Pretrained', checkpoint=img_pretrained)\n        if self.with_img_roi_head:\n            if img_pretrained is not None:\n                warnings.warn('DeprecationWarning: pretrained is a deprecated '\n                              'key, please consider using init_cfg.')\n                self.img_roi_head.init_cfg = dict(\n                    type='Pretrained', checkpoint=img_pretrained)\n\n        if self.with_pts_backbone:\n            if img_pretrained is not None:\n                warnings.warn('DeprecationWarning: pretrained is a deprecated '\n                              'key, please consider using init_cfg.')\n                self.pts_backbone.init_cfg = dict(\n                    type='Pretrained', checkpoint=pts_pretrained)\n\n    def freeze_img_branch_params(self):\n        \"\"\"Freeze all image branch parameters.\"\"\"\n        if self.with_img_bbox_head:\n            for param in self.img_bbox_head.parameters():\n                param.requires_grad = False\n        if self.with_img_backbone:\n            for param in self.img_backbone.parameters():\n                param.requires_grad = False\n        if self.with_img_neck:\n            for param in self.img_neck.parameters():\n                param.requires_grad = False\n        if self.with_img_rpn:\n            for param in self.img_rpn_head.parameters():\n                param.requires_grad = False\n        if self.with_img_roi_head:\n            for param in self.img_roi_head.parameters():\n                param.requires_grad = False\n\n    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,\n                              missing_keys, unexpected_keys, error_msgs):\n        \"\"\"Overload in order to load img network ckpts into img branch.\"\"\"\n        module_names = ['backbone', 'neck', 'roi_head', 'rpn_head']\n        for key in list(state_dict):\n            for module_name in module_names:\n                if key.startswith(module_name) and ('img_' +\n                                                    key) not in state_dict:\n                    state_dict['img_' + key] = state_dict.pop(key)\n\n        super()._load_from_state_dict(state_dict, prefix, local_metadata,\n                                      strict, missing_keys, unexpected_keys,\n                                      error_msgs)\n\n    def train(self, mode=True):\n        \"\"\"Overload in order to keep image branch modules in eval mode.\"\"\"\n        super(ImVoteNet, self).train(mode)\n        if self.freeze_img_branch:\n            if self.with_img_bbox_head:\n                self.img_bbox_head.eval()\n            if self.with_img_backbone:\n                self.img_backbone.eval()\n            if self.with_img_neck:\n                self.img_neck.eval()\n            if self.with_img_rpn:\n                self.img_rpn_head.eval()\n            if self.with_img_roi_head:\n                self.img_roi_head.eval()\n\n    @property\n    def with_img_bbox(self):\n        \"\"\"bool: Whether the detector has a 2D image box head.\"\"\"\n        return ((hasattr(self, 'img_roi_head') and self.img_roi_head.with_bbox)\n                or (hasattr(self, 'img_bbox_head')\n                    and self.img_bbox_head is not None))\n\n    @property\n    def with_img_bbox_head(self):\n        \"\"\"bool: Whether the detector has a 2D image box head (not roi).\"\"\"\n        return hasattr(self,\n                       'img_bbox_head') and self.img_bbox_head is not None\n\n    @property\n    def with_img_backbone(self):\n        \"\"\"bool: Whether the detector has a 2D image backbone.\"\"\"\n        return hasattr(self, 'img_backbone') and self.img_backbone is not None\n\n    @property\n    def with_img_neck(self):\n        \"\"\"bool: Whether the detector has a neck in image branch.\"\"\"\n        return hasattr(self, 'img_neck') and self.img_neck is not None\n\n    @property\n    def with_img_rpn(self):\n        \"\"\"bool: Whether the detector has a 2D RPN in image detector branch.\"\"\"\n        return hasattr(self, 'img_rpn_head') and self.img_rpn_head is not None\n\n    @property\n    def with_img_roi_head(self):\n        \"\"\"bool: Whether the detector has a RoI Head in image branch.\"\"\"\n        return hasattr(self, 'img_roi_head') and self.img_roi_head is not None\n\n    @property\n    def with_pts_bbox(self):\n        \"\"\"bool: Whether the detector has a 3D box head.\"\"\"\n        return hasattr(self,\n                       'pts_bbox_head') and self.pts_bbox_head is not None\n\n    @property\n    def with_pts_backbone(self):\n        \"\"\"bool: Whether the detector has a 3D backbone.\"\"\"\n        return hasattr(self, 'pts_backbone') and self.pts_backbone is not None\n\n    @property\n    def with_pts_neck(self):\n        \"\"\"bool: Whether the detector has a neck in 3D detector branch.\"\"\"\n        return hasattr(self, 'pts_neck') and self.pts_neck is not None\n\n    def extract_feat(self, imgs):\n        \"\"\"Just to inherit from abstract method.\"\"\"\n        pass\n\n    def extract_img_feat(self, img):\n        \"\"\"Directly extract features from the img backbone+neck.\"\"\"\n        x = self.img_backbone(img)\n        if self.with_img_neck:\n            x = self.img_neck(x)\n        return x\n\n    def extract_img_feats(self, imgs):\n        \"\"\"Extract features from multiple images.\n\n        Args:\n            imgs (list[torch.Tensor]): A list of images. The images are\n                augmented from the same image but in different ways.\n\n        Returns:\n            list[torch.Tensor]: Features of different images\n        \"\"\"\n\n        assert isinstance(imgs, list)\n        return [self.extract_img_feat(img) for img in imgs]\n\n    def extract_pts_feat(self, pts):\n        \"\"\"Extract features of points.\"\"\"\n        x = self.pts_backbone(pts)\n        if self.with_pts_neck:\n            x = self.pts_neck(x)\n\n        seed_points = x['fp_xyz'][-1]\n        seed_features = x['fp_features'][-1]\n        seed_indices = x['fp_indices'][-1]\n\n        return (seed_points, seed_features, seed_indices)\n\n    def extract_pts_feats(self, pts):\n        \"\"\"Extract features of points from multiple samples.\"\"\"\n        assert isinstance(pts, list)\n        return [self.extract_pts_feat(pt) for pt in pts]\n\n    @torch.no_grad()\n    def extract_bboxes_2d(self,\n                          img,\n                          img_metas,\n                          train=True,\n                          bboxes_2d=None,\n                          **kwargs):\n        \"\"\"Extract bounding boxes from 2d detector.\n\n        Args:\n            img (torch.Tensor): of shape (N, C, H, W) encoding input images.\n                Typically these should be mean centered and std scaled.\n            img_metas (list[dict]): Image meta info.\n            train (bool): train-time or not.\n            bboxes_2d (list[torch.Tensor]): provided 2d bboxes,\n                not supported yet.\n\n        Return:\n            list[torch.Tensor]: a list of processed 2d bounding boxes.\n        \"\"\"\n        if bboxes_2d is None:\n            x = self.extract_img_feat(img)\n            proposal_list = self.img_rpn_head.simple_test_rpn(x, img_metas)\n            rets = self.img_roi_head.simple_test(\n                x, proposal_list, img_metas, rescale=False)\n\n            rets_processed = []\n            for ret in rets:\n                tmp = np.concatenate(ret, axis=0)\n                sem_class = img.new_zeros((len(tmp)))\n                start = 0\n                for i, bboxes in enumerate(ret):\n                    sem_class[start:start + len(bboxes)] = i\n                    start += len(bboxes)\n                ret = img.new_tensor(tmp)\n\n                # append class index\n                ret = torch.cat([ret, sem_class[:, None]], dim=-1)\n                inds = torch.argsort(ret[:, 4], descending=True)\n                ret = ret.index_select(0, inds)\n\n                # drop half bboxes during training for better generalization\n                if train:\n                    rand_drop = torch.randperm(len(ret))[:(len(ret) + 1) // 2]\n                    rand_drop = torch.sort(rand_drop)[0]\n                    ret = ret[rand_drop]\n\n                rets_processed.append(ret.float())\n            return rets_processed\n        else:\n            rets_processed = []\n            for ret in bboxes_2d:\n                if len(ret) > 0 and train:\n                    rand_drop = torch.randperm(len(ret))[:(len(ret) + 1) // 2]\n                    rand_drop = torch.sort(rand_drop)[0]\n                    ret = ret[rand_drop]\n                rets_processed.append(ret.float())\n            return rets_processed\n\n    def forward_train(self,\n                      points=None,\n                      img=None,\n                      img_metas=None,\n                      gt_bboxes=None,\n                      gt_labels=None,\n                      gt_bboxes_ignore=None,\n                      gt_masks=None,\n                      proposals=None,\n                      bboxes_2d=None,\n                      gt_bboxes_3d=None,\n                      gt_labels_3d=None,\n                      pts_semantic_mask=None,\n                      pts_instance_mask=None,\n                      **kwargs):\n        \"\"\"Forwarding of train for image branch pretrain or stage 2 train.\n\n        Args:\n            points (list[torch.Tensor]): Points of each batch.\n            img (torch.Tensor): of shape (N, C, H, W) encoding input images.\n                Typically these should be mean centered and std scaled.\n            img_metas (list[dict]): list of image and point cloud meta info\n                dict. For example, keys include 'ori_shape', 'img_norm_cfg',\n                and 'transformation_3d_flow'. For details on the values of\n                the keys see `mmdet/datasets/pipelines/formatting.py:Collect`.\n            gt_bboxes (list[torch.Tensor]): Ground truth bboxes for each image\n                with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[torch.Tensor]): class indices for each\n                2d bounding box.\n            gt_bboxes_ignore (list[torch.Tensor]): specify which\n                2d bounding boxes can be ignored when computing the loss.\n            gt_masks (torch.Tensor): true segmentation masks for each\n                2d bbox, used if the architecture supports a segmentation task.\n            proposals: override rpn proposals (2d) with custom proposals.\n                Use when `with_rpn` is False.\n            bboxes_2d (list[torch.Tensor]): provided 2d bboxes,\n                not supported yet.\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): 3d gt bboxes.\n            gt_labels_3d (list[torch.Tensor]): gt class labels for 3d bboxes.\n            pts_semantic_mask (list[torch.Tensor]): point-wise semantic\n                label of each batch.\n            pts_instance_mask (list[torch.Tensor]): point-wise instance\n                label of each batch.\n\n        Returns:\n            dict[str, torch.Tensor]: a dictionary of loss components.\n        \"\"\"\n        if points is None:\n            x = self.extract_img_feat(img)\n            losses = dict()\n\n            # RPN forward and loss\n            if self.with_img_rpn:\n                proposal_cfg = self.train_cfg.get('img_rpn_proposal',\n                                                  self.test_cfg.img_rpn)\n                rpn_losses, proposal_list = self.img_rpn_head.forward_train(\n                    x,\n                    img_metas,\n                    gt_bboxes,\n                    gt_labels=None,\n                    gt_bboxes_ignore=gt_bboxes_ignore,\n                    proposal_cfg=proposal_cfg)\n                losses.update(rpn_losses)\n            else:\n                proposal_list = proposals\n\n            roi_losses = self.img_roi_head.forward_train(\n                x, img_metas, proposal_list, gt_bboxes, gt_labels,\n                gt_bboxes_ignore, gt_masks, **kwargs)\n            losses.update(roi_losses)\n            return losses\n        else:\n            bboxes_2d = self.extract_bboxes_2d(\n                img, img_metas, bboxes_2d=bboxes_2d, **kwargs)\n\n            points = torch.stack(points)\n            seeds_3d, seed_3d_features, seed_indices = \\\n                self.extract_pts_feat(points)\n\n            img_features, masks = self.fusion_layer(img, bboxes_2d, seeds_3d,\n                                                    img_metas)\n\n            inds = sample_valid_seeds(masks, self.num_sampled_seed)\n            batch_size, img_feat_size = img_features.shape[:2]\n            pts_feat_size = seed_3d_features.shape[1]\n            inds_img = inds.view(batch_size, 1,\n                                 -1).expand(-1, img_feat_size, -1)\n            img_features = img_features.gather(-1, inds_img)\n            inds = inds % inds.shape[1]\n            inds_seed_xyz = inds.view(batch_size, -1, 1).expand(-1, -1, 3)\n            seeds_3d = seeds_3d.gather(1, inds_seed_xyz)\n            inds_seed_feats = inds.view(batch_size, 1,\n                                        -1).expand(-1, pts_feat_size, -1)\n            seed_3d_features = seed_3d_features.gather(-1, inds_seed_feats)\n            seed_indices = seed_indices.gather(1, inds)\n\n            img_features = self.img_mlp(img_features)\n            fused_features = torch.cat([seed_3d_features, img_features], dim=1)\n\n            feat_dict_joint = dict(\n                seed_points=seeds_3d,\n                seed_features=fused_features,\n                seed_indices=seed_indices)\n            feat_dict_pts = dict(\n                seed_points=seeds_3d,\n                seed_features=seed_3d_features,\n                seed_indices=seed_indices)\n            feat_dict_img = dict(\n                seed_points=seeds_3d,\n                seed_features=img_features,\n                seed_indices=seed_indices)\n\n            loss_inputs = (points, gt_bboxes_3d, gt_labels_3d,\n                           pts_semantic_mask, pts_instance_mask, img_metas)\n            bbox_preds_joints = self.pts_bbox_head_joint(\n                feat_dict_joint, self.train_cfg.pts.sample_mod)\n            bbox_preds_pts = self.pts_bbox_head_pts(\n                feat_dict_pts, self.train_cfg.pts.sample_mod)\n            bbox_preds_img = self.pts_bbox_head_img(\n                feat_dict_img, self.train_cfg.pts.sample_mod)\n            losses_towers = []\n            losses_joint = self.pts_bbox_head_joint.loss(\n                bbox_preds_joints,\n                *loss_inputs,\n                gt_bboxes_ignore=gt_bboxes_ignore)\n            losses_pts = self.pts_bbox_head_pts.loss(\n                bbox_preds_pts,\n                *loss_inputs,\n                gt_bboxes_ignore=gt_bboxes_ignore)\n            losses_img = self.pts_bbox_head_img.loss(\n                bbox_preds_img,\n                *loss_inputs,\n                gt_bboxes_ignore=gt_bboxes_ignore)\n            losses_towers.append(losses_joint)\n            losses_towers.append(losses_pts)\n            losses_towers.append(losses_img)\n            combined_losses = dict()\n            for loss_term in losses_joint:\n                if 'loss' in loss_term:\n                    combined_losses[loss_term] = 0\n                    for i in range(len(losses_towers)):\n                        combined_losses[loss_term] += \\\n                            losses_towers[i][loss_term] * \\\n                            self.loss_weights[i]\n                else:\n                    # only save the metric of the joint head\n                    # if it is not a loss\n                    combined_losses[loss_term] = \\\n                        losses_towers[0][loss_term]\n\n            return combined_losses\n\n    def forward_test(self,\n                     points=None,\n                     img_metas=None,\n                     img=None,\n                     bboxes_2d=None,\n                     **kwargs):\n        \"\"\"Forwarding of test for image branch pretrain or stage 2 train.\n\n        Args:\n            points (list[list[torch.Tensor]], optional): the outer\n                list indicates test-time augmentations and the inner\n                list contains all points in the batch, where each Tensor\n                should have a shape NxC. Defaults to None.\n            img_metas (list[list[dict]], optional): the outer list\n                indicates test-time augs (multiscale, flip, etc.)\n                and the inner list indicates images in a batch.\n                Defaults to None.\n            img (list[list[torch.Tensor]], optional): the outer\n                list indicates test-time augmentations and inner Tensor\n                should have a shape NxCxHxW, which contains all images\n                in the batch. Defaults to None. Defaults to None.\n            bboxes_2d (list[list[torch.Tensor]], optional):\n                Provided 2d bboxes, not supported yet. Defaults to None.\n\n        Returns:\n            list[list[torch.Tensor]]|list[dict]: Predicted 2d or 3d boxes.\n        \"\"\"\n        if points is None:\n            for var, name in [(img, 'img'), (img_metas, 'img_metas')]:\n                if not isinstance(var, list):\n                    raise TypeError(\n                        f'{name} must be a list, but got {type(var)}')\n\n            num_augs = len(img)\n            if num_augs != len(img_metas):\n                raise ValueError(f'num of augmentations ({len(img)}) '\n                                 f'!= num of image meta ({len(img_metas)})')\n\n            if num_augs == 1:\n                # proposals (List[List[Tensor]]): the outer list indicates\n                # test-time augs (multiscale, flip, etc.) and the inner list\n                # indicates images in a batch.\n                # The Tensor should have a shape Px4, where P is the number of\n                # proposals.\n                if 'proposals' in kwargs:\n                    kwargs['proposals'] = kwargs['proposals'][0]\n                return self.simple_test_img_only(\n                    img=img[0], img_metas=img_metas[0], **kwargs)\n            else:\n                assert img[0].size(0) == 1, 'aug test does not support ' \\\n                                         'inference with batch size ' \\\n                                         f'{img[0].size(0)}'\n                # TODO: support test augmentation for predefined proposals\n                assert 'proposals' not in kwargs\n                return self.aug_test_img_only(\n                    img=img, img_metas=img_metas, **kwargs)\n\n        else:\n            for var, name in [(points, 'points'), (img_metas, 'img_metas')]:\n                if not isinstance(var, list):\n                    raise TypeError('{} must be a list, but got {}'.format(\n                        name, type(var)))\n\n            num_augs = len(points)\n            if num_augs != len(img_metas):\n                raise ValueError(\n                    'num of augmentations ({}) != num of image meta ({})'.\n                    format(len(points), len(img_metas)))\n\n            if num_augs == 1:\n                return self.simple_test(\n                    points[0],\n                    img_metas[0],\n                    img[0],\n                    bboxes_2d=bboxes_2d[0] if bboxes_2d is not None else None,\n                    **kwargs)\n            else:\n                return self.aug_test(points, img_metas, img, bboxes_2d,\n                                     **kwargs)\n\n    def simple_test_img_only(self,\n                             img,\n                             img_metas,\n                             proposals=None,\n                             rescale=False):\n        r\"\"\"Test without augmentation, image network pretrain. May refer to\n        `<https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/two_stage.py>`_.\n\n        Args:\n            img (torch.Tensor): Should have a shape NxCxHxW, which contains\n                all images in the batch.\n            img_metas (list[dict]):\n            proposals (list[Tensor], optional): override rpn proposals\n                with custom proposals. Defaults to None.\n            rescale (bool, optional): Whether or not rescale bboxes to the\n                original shape of input image. Defaults to False.\n\n        Returns:\n            list[list[torch.Tensor]]: Predicted 2d boxes.\n        \"\"\"  # noqa: E501\n        assert self.with_img_bbox, 'Img bbox head must be implemented.'\n        assert self.with_img_backbone, 'Img backbone must be implemented.'\n        assert self.with_img_rpn, 'Img rpn must be implemented.'\n        assert self.with_img_roi_head, 'Img roi head must be implemented.'\n\n        x = self.extract_img_feat(img)\n\n        if proposals is None:\n            proposal_list = self.img_rpn_head.simple_test_rpn(x, img_metas)\n        else:\n            proposal_list = proposals\n\n        ret = self.img_roi_head.simple_test(\n            x, proposal_list, img_metas, rescale=rescale)\n\n        return ret\n\n    def simple_test(self,\n                    points=None,\n                    img_metas=None,\n                    img=None,\n                    bboxes_2d=None,\n                    rescale=False,\n                    **kwargs):\n        \"\"\"Test without augmentation, stage 2.\n\n        Args:\n            points (list[torch.Tensor], optional): Elements in the list\n                should have a shape NxC, the list indicates all point-clouds\n                in the batch. Defaults to None.\n            img_metas (list[dict], optional): List indicates\n                images in a batch. Defaults to None.\n            img (torch.Tensor, optional): Should have a shape NxCxHxW,\n                which contains all images in the batch. Defaults to None.\n            bboxes_2d (list[torch.Tensor], optional):\n                Provided 2d bboxes, not supported yet. Defaults to None.\n            rescale (bool, optional): Whether or not rescale bboxes.\n                Defaults to False.\n\n        Returns:\n            list[dict]: Predicted 3d boxes.\n        \"\"\"\n        bboxes_2d = self.extract_bboxes_2d(\n            img, img_metas, train=False, bboxes_2d=bboxes_2d, **kwargs)\n\n        points = torch.stack(points)\n        seeds_3d, seed_3d_features, seed_indices = \\\n            self.extract_pts_feat(points)\n\n        img_features, masks = self.fusion_layer(img, bboxes_2d, seeds_3d,\n                                                img_metas)\n\n        inds = sample_valid_seeds(masks, self.num_sampled_seed)\n        batch_size, img_feat_size = img_features.shape[:2]\n        pts_feat_size = seed_3d_features.shape[1]\n        inds_img = inds.view(batch_size, 1, -1).expand(-1, img_feat_size, -1)\n        img_features = img_features.gather(-1, inds_img)\n        inds = inds % inds.shape[1]\n        inds_seed_xyz = inds.view(batch_size, -1, 1).expand(-1, -1, 3)\n        seeds_3d = seeds_3d.gather(1, inds_seed_xyz)\n        inds_seed_feats = inds.view(batch_size, 1,\n                                    -1).expand(-1, pts_feat_size, -1)\n        seed_3d_features = seed_3d_features.gather(-1, inds_seed_feats)\n        seed_indices = seed_indices.gather(1, inds)\n\n        img_features = self.img_mlp(img_features)\n\n        fused_features = torch.cat([seed_3d_features, img_features], dim=1)\n\n        feat_dict = dict(\n            seed_points=seeds_3d,\n            seed_features=fused_features,\n            seed_indices=seed_indices)\n        bbox_preds = self.pts_bbox_head_joint(feat_dict,\n                                              self.test_cfg.pts.sample_mod)\n        bbox_list = self.pts_bbox_head_joint.get_bboxes(\n            points, bbox_preds, img_metas, rescale=rescale)\n        bbox_results = [\n            bbox3d2result(bboxes, scores, labels)\n            for bboxes, scores, labels in bbox_list\n        ]\n        return bbox_results\n\n    def aug_test_img_only(self, img, img_metas, rescale=False):\n        r\"\"\"Test function with augmentation, image network pretrain. May refer\n        to `<https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/two_stage.py>`_.\n\n        Args:\n            img (list[list[torch.Tensor]], optional): the outer\n                list indicates test-time augmentations and inner Tensor\n                should have a shape NxCxHxW, which contains all images\n                in the batch. Defaults to None. Defaults to None.\n            img_metas (list[list[dict]], optional): the outer list\n                indicates test-time augs (multiscale, flip, etc.)\n                and the inner list indicates images in a batch.\n                Defaults to None.\n            rescale (bool, optional): Whether or not rescale bboxes to the\n                original shape of input image. If rescale is False, then\n                returned bboxes and masks will fit the scale of imgs[0].\n                Defaults to None.\n\n        Returns:\n            list[list[torch.Tensor]]: Predicted 2d boxes.\n        \"\"\"  # noqa: E501\n        assert self.with_img_bbox, 'Img bbox head must be implemented.'\n        assert self.with_img_backbone, 'Img backbone must be implemented.'\n        assert self.with_img_rpn, 'Img rpn must be implemented.'\n        assert self.with_img_roi_head, 'Img roi head must be implemented.'\n\n        x = self.extract_img_feats(img)\n        proposal_list = self.img_rpn_head.aug_test_rpn(x, img_metas)\n\n        return self.img_roi_head.aug_test(\n            x, proposal_list, img_metas, rescale=rescale)\n\n    def aug_test(self,\n                 points=None,\n                 img_metas=None,\n                 imgs=None,\n                 bboxes_2d=None,\n                 rescale=False,\n                 **kwargs):\n        \"\"\"Test function with augmentation, stage 2.\n\n        Args:\n            points (list[list[torch.Tensor]], optional): the outer\n                list indicates test-time augmentations and the inner\n                list contains all points in the batch, where each Tensor\n                should have a shape NxC. Defaults to None.\n            img_metas (list[list[dict]], optional): the outer list\n                indicates test-time augs (multiscale, flip, etc.)\n                and the inner list indicates images in a batch.\n                Defaults to None.\n            imgs (list[list[torch.Tensor]], optional): the outer\n                list indicates test-time augmentations and inner Tensor\n                should have a shape NxCxHxW, which contains all images\n                in the batch. Defaults to None. Defaults to None.\n            bboxes_2d (list[list[torch.Tensor]], optional):\n                Provided 2d bboxes, not supported yet. Defaults to None.\n            rescale (bool, optional): Whether or not rescale bboxes.\n                Defaults to False.\n\n        Returns:\n            list[dict]: Predicted 3d boxes.\n        \"\"\"\n        points_cat = [torch.stack(pts) for pts in points]\n        feats = self.extract_pts_feats(points_cat, img_metas)\n\n        # only support aug_test for one sample\n        aug_bboxes = []\n        for x, pts_cat, img_meta, bbox_2d, img in zip(feats, points_cat,\n                                                      img_metas, bboxes_2d,\n                                                      imgs):\n\n            bbox_2d = self.extract_bboxes_2d(\n                img, img_metas, train=False, bboxes_2d=bbox_2d, **kwargs)\n\n            seeds_3d, seed_3d_features, seed_indices = x\n\n            img_features, masks = self.fusion_layer(img, bbox_2d, seeds_3d,\n                                                    img_metas)\n\n            inds = sample_valid_seeds(masks, self.num_sampled_seed)\n            batch_size, img_feat_size = img_features.shape[:2]\n            pts_feat_size = seed_3d_features.shape[1]\n            inds_img = inds.view(batch_size, 1,\n                                 -1).expand(-1, img_feat_size, -1)\n            img_features = img_features.gather(-1, inds_img)\n            inds = inds % inds.shape[1]\n            inds_seed_xyz = inds.view(batch_size, -1, 1).expand(-1, -1, 3)\n            seeds_3d = seeds_3d.gather(1, inds_seed_xyz)\n            inds_seed_feats = inds.view(batch_size, 1,\n                                        -1).expand(-1, pts_feat_size, -1)\n            seed_3d_features = seed_3d_features.gather(-1, inds_seed_feats)\n            seed_indices = seed_indices.gather(1, inds)\n\n            img_features = self.img_mlp(img_features)\n\n            fused_features = torch.cat([seed_3d_features, img_features], dim=1)\n\n            feat_dict = dict(\n                seed_points=seeds_3d,\n                seed_features=fused_features,\n                seed_indices=seed_indices)\n            bbox_preds = self.pts_bbox_head_joint(feat_dict,\n                                                  self.test_cfg.pts.sample_mod)\n            bbox_list = self.pts_bbox_head_joint.get_bboxes(\n                pts_cat, bbox_preds, img_metas, rescale=rescale)\n\n            bbox_list = [\n                dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels)\n                for bboxes, scores, labels in bbox_list\n            ]\n            aug_bboxes.append(bbox_list[0])\n\n        # after merging, bboxes will be rescaled to the original image size\n        merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas,\n                                            self.bbox_head.test_cfg)\n\n        return [merged_bboxes]\n"
  },
  {
    "path": "mmdet3d/models/detectors/imvoxelnet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet3d.core import bbox3d2result, build_prior_generator\nfrom mmdet3d.models.fusion_layers.point_fusion import point_sample\nfrom mmdet.models.detectors import BaseDetector\nfrom ..builder import DETECTORS, build_backbone, build_head, build_neck\n\n\n@DETECTORS.register_module()\nclass ImVoxelNet(BaseDetector):\n    r\"\"\"`ImVoxelNet <https://arxiv.org/abs/2106.01178>`_.\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 neck_3d,\n                 bbox_head,\n                 n_voxels,\n                 anchor_generator,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super().__init__(init_cfg=init_cfg)\n        self.backbone = build_backbone(backbone)\n        self.neck = build_neck(neck)\n        self.neck_3d = build_neck(neck_3d)\n        bbox_head.update(train_cfg=train_cfg)\n        bbox_head.update(test_cfg=test_cfg)\n        self.bbox_head = build_head(bbox_head)\n        self.n_voxels = n_voxels\n        self.anchor_generator = build_prior_generator(anchor_generator)\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n\n    def extract_feat(self, img, img_metas):\n        \"\"\"Extract 3d features from the backbone -> fpn -> 3d projection.\n\n        Args:\n            img (torch.Tensor): Input images of shape (N, C_in, H, W).\n            img_metas (list): Image metas.\n\n        Returns:\n            torch.Tensor: of shape (N, C_out, N_x, N_y, N_z)\n        \"\"\"\n        x = self.backbone(img)\n        x = self.neck(x)[0]\n        points = self.anchor_generator.grid_anchors(\n            [self.n_voxels[::-1]], device=img.device)[0][:, :3]\n        volumes = []\n        for feature, img_meta in zip(x, img_metas):\n            img_scale_factor = (\n                points.new_tensor(img_meta['scale_factor'][:2])\n                if 'scale_factor' in img_meta.keys() else 1)\n            img_flip = img_meta['flip'] if 'flip' in img_meta.keys() else False\n            img_crop_offset = (\n                points.new_tensor(img_meta['img_crop_offset'])\n                if 'img_crop_offset' in img_meta.keys() else 0)\n            volume = point_sample(\n                img_meta,\n                img_features=feature[None, ...],\n                points=points,\n                proj_mat=points.new_tensor(img_meta['lidar2img']),\n                coord_type='LIDAR',\n                img_scale_factor=img_scale_factor,\n                img_crop_offset=img_crop_offset,\n                img_flip=img_flip,\n                img_pad_shape=img.shape[-2:],\n                img_shape=img_meta['img_shape'][:2],\n                aligned=False)\n            volumes.append(\n                volume.reshape(self.n_voxels[::-1] + [-1]).permute(3, 2, 1, 0))\n        x = torch.stack(volumes)\n        x = self.neck_3d(x)\n        return x\n\n    def forward_train(self, img, img_metas, gt_bboxes_3d, gt_labels_3d,\n                      **kwargs):\n        \"\"\"Forward of training.\n\n        Args:\n            img (torch.Tensor): Input images of shape (N, C_in, H, W).\n            img_metas (list): Image metas.\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.\n            gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.\n\n        Returns:\n            dict[str, torch.Tensor]: A dictionary of loss components.\n        \"\"\"\n        x = self.extract_feat(img, img_metas)\n        x = self.bbox_head(x)\n        losses = self.bbox_head.loss(*x, gt_bboxes_3d, gt_labels_3d, img_metas)\n        return losses\n\n    def forward_test(self, img, img_metas, **kwargs):\n        \"\"\"Forward of testing.\n\n        Args:\n            img (torch.Tensor): Input images of shape (N, C_in, H, W).\n            img_metas (list): Image metas.\n\n        Returns:\n            list[dict]: Predicted 3d boxes.\n        \"\"\"\n        # not supporting aug_test for now\n        return self.simple_test(img, img_metas)\n\n    def simple_test(self, img, img_metas):\n        \"\"\"Test without augmentations.\n\n        Args:\n            img (torch.Tensor): Input images of shape (N, C_in, H, W).\n            img_metas (list): Image metas.\n\n        Returns:\n            list[dict]: Predicted 3d boxes.\n        \"\"\"\n        x = self.extract_feat(img, img_metas)\n        x = self.bbox_head(x)\n        bbox_list = self.bbox_head.get_bboxes(*x, img_metas)\n        bbox_results = [\n            bbox3d2result(det_bboxes, det_scores, det_labels)\n            for det_bboxes, det_scores, det_labels in bbox_list\n        ]\n        return bbox_results\n\n    def aug_test(self, imgs, img_metas, **kwargs):\n        \"\"\"Test with augmentations.\n\n        Args:\n            imgs (list[torch.Tensor]): Input images of shape (N, C_in, H, W).\n            img_metas (list): Image metas.\n\n        Returns:\n            list[dict]: Predicted 3d boxes.\n        \"\"\"\n        raise NotImplementedError\n"
  },
  {
    "path": "mmdet3d/models/detectors/mink_single_stage.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n# Adapted from https://github.com/SamsungLabs/fcaf3d/blob/master/mmdet3d/models/detectors/single_stage_sparse.py # noqa\ntry:\n    import MinkowskiEngine as ME\nexcept ImportError:\n    # Please follow getting_started.md to install MinkowskiEngine.\n    pass\n\nfrom mmdet3d.core import bbox3d2result\nfrom mmdet3d.models import DETECTORS, build_backbone, build_head\nfrom .base import Base3DDetector\n\n\n@DETECTORS.register_module()\nclass MinkSingleStage3DDetector(Base3DDetector):\n    r\"\"\"Single stage detector based on MinkowskiEngine `GSDN\n    <https://arxiv.org/abs/2006.12356>`_.\n\n    Args:\n        backbone (dict): Config of the backbone.\n        head (dict): Config of the head.\n        voxel_size (float): Voxel size in meters.\n        train_cfg (dict, optional): Config for train stage. Defaults to None.\n        test_cfg (dict, optional): Config for test stage. Defaults to None.\n        init_cfg (dict, optional): Config for weight initialization.\n            Defaults to None.\n        pretrained (str, optional): Deprecated initialization parameter.\n            Defaults to None.\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 head,\n                 voxel_size,\n                 train_cfg=None,\n                 test_cfg=None,\n                 init_cfg=None,\n                 pretrained=None):\n        super(MinkSingleStage3DDetector, self).__init__(init_cfg)\n        self.backbone = build_backbone(backbone)\n        head.update(train_cfg=train_cfg)\n        head.update(test_cfg=test_cfg)\n        self.head = build_head(head)\n        self.voxel_size = voxel_size\n        self.init_weights()\n\n    def extract_feat(self, points):\n        \"\"\"Extract features from points.\n\n        Args:\n            points (list[Tensor]): Raw point clouds.\n\n        Returns:\n            SparseTensor: Voxelized point clouds.\n        \"\"\"\n        coordinates, features = ME.utils.batch_sparse_collate(\n            [(p[:, :3] / self.voxel_size, p[:, 3:]) for p in points],\n            device=points[0].device)\n        x = ME.SparseTensor(coordinates=coordinates, features=features)\n        x = self.backbone(x)\n        return x\n\n    def forward_train(self, points, gt_bboxes_3d, gt_labels_3d, img_metas):\n        \"\"\"Forward of training.\n\n        Args:\n            points (list[Tensor]): Raw point clouds.\n            gt_bboxes (list[BaseInstance3DBoxes]): Ground truth\n                bboxes of each sample.\n            gt_labels(list[torch.Tensor]): Labels of each sample.\n            img_metas (list[dict]): Contains scene meta infos.\n\n        Returns:\n            dict: Centerness, bbox and classification loss values.\n        \"\"\"\n        x = self.extract_feat(points)\n        losses = self.head.forward_train(x, gt_bboxes_3d, gt_labels_3d,\n                                         img_metas)\n        return losses\n\n    def simple_test(self, points, img_metas, *args, **kwargs):\n        \"\"\"Test without augmentations.\n\n        Args:\n            points (list[torch.Tensor]): Points of each sample.\n            img_metas (list[dict]): Contains scene meta infos.\n\n        Returns:\n            list[dict]: Predicted 3d boxes.\n        \"\"\"\n        x = self.extract_feat(points)\n        bbox_list = self.head.forward_test(x, img_metas)\n        bbox_results = [\n            bbox3d2result(bboxes, scores, labels)\n            for bboxes, scores, labels in bbox_list\n        ]\n        return bbox_results\n\n    def aug_test(self, points, img_metas, **kwargs):\n        \"\"\"Test with augmentations.\n\n        Args:\n            points (list[list[torch.Tensor]]): Points of each sample.\n            img_metas (list[dict]): Contains scene meta infos.\n\n        Returns:\n            list[dict]: Predicted 3d boxes.\n        \"\"\"\n        raise NotImplementedError\n"
  },
  {
    "path": "mmdet3d/models/detectors/mvx_faster_rcnn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.runner import force_fp32\nfrom torch.nn import functional as F\n\nfrom ..builder import DETECTORS\nfrom .mvx_two_stage import MVXTwoStageDetector\n\n\n@DETECTORS.register_module()\nclass MVXFasterRCNN(MVXTwoStageDetector):\n    \"\"\"Multi-modality VoxelNet using Faster R-CNN.\"\"\"\n\n    def __init__(self, **kwargs):\n        super(MVXFasterRCNN, self).__init__(**kwargs)\n\n\n@DETECTORS.register_module()\nclass DynamicMVXFasterRCNN(MVXTwoStageDetector):\n    \"\"\"Multi-modality VoxelNet using Faster R-CNN and dynamic voxelization.\"\"\"\n\n    def __init__(self, **kwargs):\n        super(DynamicMVXFasterRCNN, self).__init__(**kwargs)\n\n    @torch.no_grad()\n    @force_fp32()\n    def voxelize(self, points):\n        \"\"\"Apply dynamic voxelization to points.\n\n        Args:\n            points (list[torch.Tensor]): Points of each sample.\n\n        Returns:\n            tuple[torch.Tensor]: Concatenated points and coordinates.\n        \"\"\"\n        coors = []\n        # dynamic voxelization only provide a coors mapping\n        for res in points:\n            res_coors = self.pts_voxel_layer(res)\n            coors.append(res_coors)\n        points = torch.cat(points, dim=0)\n        coors_batch = []\n        for i, coor in enumerate(coors):\n            coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)\n            coors_batch.append(coor_pad)\n        coors_batch = torch.cat(coors_batch, dim=0)\n        return points, coors_batch\n\n    def extract_pts_feat(self, points, img_feats, img_metas):\n        \"\"\"Extract point features.\"\"\"\n        if not self.with_pts_bbox:\n            return None\n        voxels, coors = self.voxelize(points)\n        voxel_features, feature_coors = self.pts_voxel_encoder(\n            voxels, coors, points, img_feats, img_metas)\n        batch_size = coors[-1, 0] + 1\n        x = self.pts_middle_encoder(voxel_features, feature_coors, batch_size)\n        x = self.pts_backbone(x)\n        if self.with_pts_neck:\n            x = self.pts_neck(x)\n        return x\n"
  },
  {
    "path": "mmdet3d/models/detectors/mvx_two_stage.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\nfrom os import path as osp\n\nimport mmcv\nimport torch\nfrom mmcv.ops import Voxelization\nfrom mmcv.parallel import DataContainer as DC\nfrom mmcv.runner import force_fp32\nfrom torch.nn import functional as F\n\nfrom mmdet3d.core import (Box3DMode, Coord3DMode, bbox3d2result,\n                          merge_aug_bboxes_3d, show_result)\nfrom mmdet.core import multi_apply\nfrom .. import builder\nfrom ..builder import DETECTORS\nfrom .base import Base3DDetector\n\n\n@DETECTORS.register_module()\nclass MVXTwoStageDetector(Base3DDetector):\n    \"\"\"Base class of Multi-modality VoxelNet.\"\"\"\n\n    def __init__(self,\n                 pts_voxel_layer=None,\n                 pts_voxel_encoder=None,\n                 pts_middle_encoder=None,\n                 pts_fusion_layer=None,\n                 img_backbone=None,\n                 pts_backbone=None,\n                 img_neck=None,\n                 pts_neck=None,\n                 pts_bbox_head=None,\n                 img_roi_head=None,\n                 img_rpn_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(MVXTwoStageDetector, self).__init__(init_cfg=init_cfg)\n\n        if pts_voxel_layer:\n            self.pts_voxel_layer = Voxelization(**pts_voxel_layer)\n        if pts_voxel_encoder:\n            self.pts_voxel_encoder = builder.build_voxel_encoder(\n                pts_voxel_encoder)\n        if pts_middle_encoder:\n            self.pts_middle_encoder = builder.build_middle_encoder(\n                pts_middle_encoder)\n        if pts_backbone:\n            self.pts_backbone = builder.build_backbone(pts_backbone)\n        if pts_fusion_layer:\n            self.pts_fusion_layer = builder.build_fusion_layer(\n                pts_fusion_layer)\n        if pts_neck is not None:\n            self.pts_neck = builder.build_neck(pts_neck)\n        if pts_bbox_head:\n            pts_train_cfg = train_cfg.pts if train_cfg else None\n            pts_bbox_head.update(train_cfg=pts_train_cfg)\n            pts_test_cfg = test_cfg.pts if test_cfg else None\n            pts_bbox_head.update(test_cfg=pts_test_cfg)\n            self.pts_bbox_head = builder.build_head(pts_bbox_head)\n\n        if img_backbone:\n            self.img_backbone = builder.build_backbone(img_backbone)\n        if img_neck is not None:\n            self.img_neck = builder.build_neck(img_neck)\n        if img_rpn_head is not None:\n            self.img_rpn_head = builder.build_head(img_rpn_head)\n        if img_roi_head is not None:\n            self.img_roi_head = builder.build_head(img_roi_head)\n\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n\n        if pretrained is None:\n            img_pretrained = None\n            pts_pretrained = None\n        elif isinstance(pretrained, dict):\n            img_pretrained = pretrained.get('img', None)\n            pts_pretrained = pretrained.get('pts', None)\n        else:\n            raise ValueError(\n                f'pretrained should be a dict, got {type(pretrained)}')\n\n        if self.with_img_backbone:\n            if img_pretrained is not None:\n                warnings.warn('DeprecationWarning: pretrained is a deprecated '\n                              'key, please consider using init_cfg.')\n                self.img_backbone.init_cfg = dict(\n                    type='Pretrained', checkpoint=img_pretrained)\n        if self.with_img_roi_head:\n            if img_pretrained is not None:\n                warnings.warn('DeprecationWarning: pretrained is a deprecated '\n                              'key, please consider using init_cfg.')\n                self.img_roi_head.init_cfg = dict(\n                    type='Pretrained', checkpoint=img_pretrained)\n        if self.with_pts_backbone:\n            if pts_pretrained is not None:\n                warnings.warn('DeprecationWarning: pretrained is a deprecated '\n                              'key, please consider using init_cfg')\n                self.pts_backbone.init_cfg = dict(\n                    type='Pretrained', checkpoint=pts_pretrained)\n\n    @property\n    def with_img_shared_head(self):\n        \"\"\"bool: Whether the detector has a shared head in image branch.\"\"\"\n        return hasattr(self,\n                       'img_shared_head') and self.img_shared_head is not None\n\n    @property\n    def with_pts_bbox(self):\n        \"\"\"bool: Whether the detector has a 3D box head.\"\"\"\n        return hasattr(self,\n                       'pts_bbox_head') and self.pts_bbox_head is not None\n\n    @property\n    def with_img_bbox(self):\n        \"\"\"bool: Whether the detector has a 2D image box head.\"\"\"\n        return hasattr(self,\n                       'img_bbox_head') and self.img_bbox_head is not None\n\n    @property\n    def with_img_backbone(self):\n        \"\"\"bool: Whether the detector has a 2D image backbone.\"\"\"\n        return hasattr(self, 'img_backbone') and self.img_backbone is not None\n\n    @property\n    def with_pts_backbone(self):\n        \"\"\"bool: Whether the detector has a 3D backbone.\"\"\"\n        return hasattr(self, 'pts_backbone') and self.pts_backbone is not None\n\n    @property\n    def with_fusion(self):\n        \"\"\"bool: Whether the detector has a fusion layer.\"\"\"\n        return hasattr(self,\n                       'pts_fusion_layer') and self.fusion_layer is not None\n\n    @property\n    def with_img_neck(self):\n        \"\"\"bool: Whether the detector has a neck in image branch.\"\"\"\n        return hasattr(self, 'img_neck') and self.img_neck is not None\n\n    @property\n    def with_pts_neck(self):\n        \"\"\"bool: Whether the detector has a neck in 3D detector branch.\"\"\"\n        return hasattr(self, 'pts_neck') and self.pts_neck is not None\n\n    @property\n    def with_img_rpn(self):\n        \"\"\"bool: Whether the detector has a 2D RPN in image detector branch.\"\"\"\n        return hasattr(self, 'img_rpn_head') and self.img_rpn_head is not None\n\n    @property\n    def with_img_roi_head(self):\n        \"\"\"bool: Whether the detector has a RoI Head in image branch.\"\"\"\n        return hasattr(self, 'img_roi_head') and self.img_roi_head is not None\n\n    @property\n    def with_voxel_encoder(self):\n        \"\"\"bool: Whether the detector has a voxel encoder.\"\"\"\n        return hasattr(self,\n                       'voxel_encoder') and self.voxel_encoder is not None\n\n    @property\n    def with_middle_encoder(self):\n        \"\"\"bool: Whether the detector has a middle encoder.\"\"\"\n        return hasattr(self,\n                       'middle_encoder') and self.middle_encoder is not None\n\n    def extract_img_feat(self, img, img_metas):\n        \"\"\"Extract features of images.\"\"\"\n        if self.with_img_backbone and img is not None:\n            input_shape = img.shape[-2:]\n            # update real input shape of each single img\n            for img_meta in img_metas:\n                img_meta.update(input_shape=input_shape)\n\n            if img.dim() == 5 and img.size(0) == 1:\n                img.squeeze_()\n            elif img.dim() == 5 and img.size(0) > 1:\n                B, N, C, H, W = img.size()\n                img = img.view(B * N, C, H, W)\n            img_feats = self.img_backbone(img)\n        else:\n            return None\n        if self.with_img_neck:\n            img_feats = self.img_neck(img_feats)\n        return img_feats\n\n    def extract_pts_feat(self, pts, img_feats, img_metas):\n        \"\"\"Extract features of points.\"\"\"\n        if not self.with_pts_bbox:\n            return None\n        voxels, num_points, coors = self.voxelize(pts)\n        voxel_features = self.pts_voxel_encoder(voxels, num_points, coors,\n                                                img_feats, img_metas)\n        batch_size = coors[-1, 0] + 1\n        x = self.pts_middle_encoder(voxel_features, coors, batch_size)\n        x = self.pts_backbone(x)\n        if self.with_pts_neck:\n            x = self.pts_neck(x)\n        return x\n\n    def extract_feat(self, points, img, img_metas):\n        \"\"\"Extract features from images and points.\"\"\"\n        img_feats = self.extract_img_feat(img, img_metas)\n        pts_feats = self.extract_pts_feat(points, img_feats, img_metas)\n        return (img_feats, pts_feats)\n\n    @torch.no_grad()\n    @force_fp32()\n    def voxelize(self, points):\n        \"\"\"Apply dynamic voxelization to points.\n\n        Args:\n            points (list[torch.Tensor]): Points of each sample.\n\n        Returns:\n            tuple[torch.Tensor]: Concatenated points, number of points\n                per voxel, and coordinates.\n        \"\"\"\n        voxels, coors, num_points = [], [], []\n        for res in points:\n            res_voxels, res_coors, res_num_points = self.pts_voxel_layer(res)\n            voxels.append(res_voxels)\n            coors.append(res_coors)\n            num_points.append(res_num_points)\n        voxels = torch.cat(voxels, dim=0)\n        num_points = torch.cat(num_points, dim=0)\n        coors_batch = []\n        for i, coor in enumerate(coors):\n            coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)\n            coors_batch.append(coor_pad)\n        coors_batch = torch.cat(coors_batch, dim=0)\n        return voxels, num_points, coors_batch\n\n    def forward_train(self,\n                      points=None,\n                      img_metas=None,\n                      gt_bboxes_3d=None,\n                      gt_labels_3d=None,\n                      gt_labels=None,\n                      gt_bboxes=None,\n                      img=None,\n                      proposals=None,\n                      gt_bboxes_ignore=None):\n        \"\"\"Forward training function.\n\n        Args:\n            points (list[torch.Tensor], optional): Points of each sample.\n                Defaults to None.\n            img_metas (list[dict], optional): Meta information of each sample.\n                Defaults to None.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`], optional):\n                Ground truth 3D boxes. Defaults to None.\n            gt_labels_3d (list[torch.Tensor], optional): Ground truth labels\n                of 3D boxes. Defaults to None.\n            gt_labels (list[torch.Tensor], optional): Ground truth labels\n                of 2D boxes in images. Defaults to None.\n            gt_bboxes (list[torch.Tensor], optional): Ground truth 2D boxes in\n                images. Defaults to None.\n            img (torch.Tensor, optional): Images of each sample with shape\n                (N, C, H, W). Defaults to None.\n            proposals ([list[torch.Tensor], optional): Predicted proposals\n                used for training Fast RCNN. Defaults to None.\n            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth\n                2D boxes in images to be ignored. Defaults to None.\n\n        Returns:\n            dict: Losses of different branches.\n        \"\"\"\n        img_feats, pts_feats = self.extract_feat(\n            points, img=img, img_metas=img_metas)\n        losses = dict()\n        if pts_feats:\n            losses_pts = self.forward_pts_train(pts_feats, gt_bboxes_3d,\n                                                gt_labels_3d, img_metas,\n                                                gt_bboxes_ignore)\n            losses.update(losses_pts)\n        if img_feats:\n            losses_img = self.forward_img_train(\n                img_feats,\n                img_metas=img_metas,\n                gt_bboxes=gt_bboxes,\n                gt_labels=gt_labels,\n                gt_bboxes_ignore=gt_bboxes_ignore,\n                proposals=proposals)\n            losses.update(losses_img)\n        return losses\n\n    def forward_pts_train(self,\n                          pts_feats,\n                          gt_bboxes_3d,\n                          gt_labels_3d,\n                          img_metas,\n                          gt_bboxes_ignore=None):\n        \"\"\"Forward function for point cloud branch.\n\n        Args:\n            pts_feats (list[torch.Tensor]): Features of point cloud branch\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                boxes for each sample.\n            gt_labels_3d (list[torch.Tensor]): Ground truth labels for\n                boxes of each sampole\n            img_metas (list[dict]): Meta information of samples.\n            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth\n                boxes to be ignored. Defaults to None.\n\n        Returns:\n            dict: Losses of each branch.\n        \"\"\"\n        outs = self.pts_bbox_head(pts_feats)\n        loss_inputs = outs + (gt_bboxes_3d, gt_labels_3d, img_metas)\n        losses = self.pts_bbox_head.loss(\n            *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)\n        return losses\n\n    def forward_img_train(self,\n                          x,\n                          img_metas,\n                          gt_bboxes,\n                          gt_labels,\n                          gt_bboxes_ignore=None,\n                          proposals=None,\n                          **kwargs):\n        \"\"\"Forward function for image branch.\n\n        This function works similar to the forward function of Faster R-CNN.\n\n        Args:\n            x (list[torch.Tensor]): Image features of shape (B, C, H, W)\n                of multiple levels.\n            img_metas (list[dict]): Meta information of images.\n            gt_bboxes (list[torch.Tensor]): Ground truth boxes of each image\n                sample.\n            gt_labels (list[torch.Tensor]): Ground truth labels of boxes.\n            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth\n                boxes to be ignored. Defaults to None.\n            proposals (list[torch.Tensor], optional): Proposals of each sample.\n                Defaults to None.\n\n        Returns:\n            dict: Losses of each branch.\n        \"\"\"\n        losses = dict()\n        # RPN forward and loss\n        if self.with_img_rpn:\n            rpn_outs = self.img_rpn_head(x)\n            rpn_loss_inputs = rpn_outs + (gt_bboxes, img_metas,\n                                          self.train_cfg.img_rpn)\n            rpn_losses = self.img_rpn_head.loss(\n                *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)\n            losses.update(rpn_losses)\n\n            proposal_cfg = self.train_cfg.get('img_rpn_proposal',\n                                              self.test_cfg.img_rpn)\n            proposal_inputs = rpn_outs + (img_metas, proposal_cfg)\n            proposal_list = self.img_rpn_head.get_bboxes(*proposal_inputs)\n        else:\n            proposal_list = proposals\n\n        # bbox head forward and loss\n        if self.with_img_bbox:\n            # bbox head forward and loss\n            img_roi_losses = self.img_roi_head.forward_train(\n                x, img_metas, proposal_list, gt_bboxes, gt_labels,\n                gt_bboxes_ignore, **kwargs)\n            losses.update(img_roi_losses)\n\n        return losses\n\n    def simple_test_img(self, x, img_metas, proposals=None, rescale=False):\n        \"\"\"Test without augmentation.\"\"\"\n        if proposals is None:\n            proposal_list = self.simple_test_rpn(x, img_metas,\n                                                 self.test_cfg.img_rpn)\n        else:\n            proposal_list = proposals\n\n        return self.img_roi_head.simple_test(\n            x, proposal_list, img_metas, rescale=rescale)\n\n    def simple_test_rpn(self, x, img_metas, rpn_test_cfg):\n        \"\"\"RPN test function.\"\"\"\n        rpn_outs = self.img_rpn_head(x)\n        proposal_inputs = rpn_outs + (img_metas, rpn_test_cfg)\n        proposal_list = self.img_rpn_head.get_bboxes(*proposal_inputs)\n        return proposal_list\n\n    def simple_test_pts(self, x, img_metas, rescale=False):\n        \"\"\"Test function of point cloud branch.\"\"\"\n        outs = self.pts_bbox_head(x)\n        bbox_list = self.pts_bbox_head.get_bboxes(\n            *outs, img_metas, rescale=rescale)\n        bbox_results = [\n            bbox3d2result(bboxes, scores, labels)\n            for bboxes, scores, labels in bbox_list\n        ]\n        return bbox_results\n\n    def simple_test(self, points, img_metas, img=None, rescale=False):\n        \"\"\"Test function without augmentaiton.\"\"\"\n        img_feats, pts_feats = self.extract_feat(\n            points, img=img, img_metas=img_metas)\n\n        bbox_list = [dict() for i in range(len(img_metas))]\n        if pts_feats and self.with_pts_bbox:\n            bbox_pts = self.simple_test_pts(\n                pts_feats, img_metas, rescale=rescale)\n            for result_dict, pts_bbox in zip(bbox_list, bbox_pts):\n                result_dict['pts_bbox'] = pts_bbox\n        if img_feats and self.with_img_bbox:\n            bbox_img = self.simple_test_img(\n                img_feats, img_metas, rescale=rescale)\n            for result_dict, img_bbox in zip(bbox_list, bbox_img):\n                result_dict['img_bbox'] = img_bbox\n        return bbox_list\n\n    def aug_test(self, points, img_metas, imgs=None, rescale=False):\n        \"\"\"Test function with augmentaiton.\"\"\"\n        img_feats, pts_feats = self.extract_feats(points, img_metas, imgs)\n\n        bbox_list = dict()\n        if pts_feats and self.with_pts_bbox:\n            bbox_pts = self.aug_test_pts(pts_feats, img_metas, rescale)\n            bbox_list.update(pts_bbox=bbox_pts)\n        return [bbox_list]\n\n    def extract_feats(self, points, img_metas, imgs=None):\n        \"\"\"Extract point and image features of multiple samples.\"\"\"\n        if imgs is None:\n            imgs = [None] * len(img_metas)\n        img_feats, pts_feats = multi_apply(self.extract_feat, points, imgs,\n                                           img_metas)\n        return img_feats, pts_feats\n\n    def aug_test_pts(self, feats, img_metas, rescale=False):\n        \"\"\"Test function of point cloud branch with augmentaiton.\"\"\"\n        # only support aug_test for one sample\n        aug_bboxes = []\n        for x, img_meta in zip(feats, img_metas):\n            outs = self.pts_bbox_head(x)\n            bbox_list = self.pts_bbox_head.get_bboxes(\n                *outs, img_meta, rescale=rescale)\n            bbox_list = [\n                dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels)\n                for bboxes, scores, labels in bbox_list\n            ]\n            aug_bboxes.append(bbox_list[0])\n\n        # after merging, bboxes will be rescaled to the original image size\n        merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas,\n                                            self.pts_bbox_head.test_cfg)\n        return merged_bboxes\n\n    def show_results(self, data, result, out_dir):\n        \"\"\"Results visualization.\n\n        Args:\n            data (dict): Input points and the information of the sample.\n            result (dict): Prediction results.\n            out_dir (str): Output directory of visualization result.\n        \"\"\"\n        for batch_id in range(len(result)):\n            if isinstance(data['points'][0], DC):\n                points = data['points'][0]._data[0][batch_id].numpy()\n            elif mmcv.is_list_of(data['points'][0], torch.Tensor):\n                points = data['points'][0][batch_id]\n            else:\n                ValueError(f\"Unsupported data type {type(data['points'][0])} \"\n                           f'for visualization!')\n            if isinstance(data['img_metas'][0], DC):\n                pts_filename = data['img_metas'][0]._data[0][batch_id][\n                    'pts_filename']\n                box_mode_3d = data['img_metas'][0]._data[0][batch_id][\n                    'box_mode_3d']\n            elif mmcv.is_list_of(data['img_metas'][0], dict):\n                pts_filename = data['img_metas'][0][batch_id]['pts_filename']\n                box_mode_3d = data['img_metas'][0][batch_id]['box_mode_3d']\n            else:\n                ValueError(\n                    f\"Unsupported data type {type(data['img_metas'][0])} \"\n                    f'for visualization!')\n            file_name = osp.split(pts_filename)[-1].split('.')[0]\n\n            assert out_dir is not None, 'Expect out_dir, got none.'\n            inds = result[batch_id]['pts_bbox']['scores_3d'] > 0.1\n            pred_bboxes = result[batch_id]['pts_bbox']['boxes_3d'][inds]\n\n            # for now we convert points and bbox into depth mode\n            if (box_mode_3d == Box3DMode.CAM) or (box_mode_3d\n                                                  == Box3DMode.LIDAR):\n                points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR,\n                                                   Coord3DMode.DEPTH)\n                pred_bboxes = Box3DMode.convert(pred_bboxes, box_mode_3d,\n                                                Box3DMode.DEPTH)\n            elif box_mode_3d != Box3DMode.DEPTH:\n                ValueError(\n                    f'Unsupported box_mode_3d {box_mode_3d} for conversion!')\n\n            pred_bboxes = pred_bboxes.tensor.cpu().numpy()\n            show_result(points, None, pred_bboxes, out_dir, file_name)\n"
  },
  {
    "path": "mmdet3d/models/detectors/parta2.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.ops import Voxelization\nfrom torch.nn import functional as F\n\nfrom .. import builder\nfrom ..builder import DETECTORS\nfrom .two_stage import TwoStage3DDetector\n\n\n@DETECTORS.register_module()\nclass PartA2(TwoStage3DDetector):\n    r\"\"\"Part-A2 detector.\n\n    Please refer to the `paper <https://arxiv.org/abs/1907.03670>`_\n    \"\"\"\n\n    def __init__(self,\n                 voxel_layer,\n                 voxel_encoder,\n                 middle_encoder,\n                 backbone,\n                 neck=None,\n                 rpn_head=None,\n                 roi_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(PartA2, self).__init__(\n            backbone=backbone,\n            neck=neck,\n            rpn_head=rpn_head,\n            roi_head=roi_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained,\n            init_cfg=init_cfg)\n        self.voxel_layer = Voxelization(**voxel_layer)\n        self.voxel_encoder = builder.build_voxel_encoder(voxel_encoder)\n        self.middle_encoder = builder.build_middle_encoder(middle_encoder)\n\n    def extract_feat(self, points, img_metas):\n        \"\"\"Extract features from points.\"\"\"\n        voxel_dict = self.voxelize(points)\n        voxel_features = self.voxel_encoder(voxel_dict['voxels'],\n                                            voxel_dict['num_points'],\n                                            voxel_dict['coors'])\n        batch_size = voxel_dict['coors'][-1, 0].item() + 1\n        feats_dict = self.middle_encoder(voxel_features, voxel_dict['coors'],\n                                         batch_size)\n        x = self.backbone(feats_dict['spatial_features'])\n        if self.with_neck:\n            neck_feats = self.neck(x)\n            feats_dict.update({'neck_feats': neck_feats})\n        return feats_dict, voxel_dict\n\n    @torch.no_grad()\n    def voxelize(self, points):\n        \"\"\"Apply hard voxelization to points.\"\"\"\n        voxels, coors, num_points, voxel_centers = [], [], [], []\n        for res in points:\n            res_voxels, res_coors, res_num_points = self.voxel_layer(res)\n            res_voxel_centers = (\n                res_coors[:, [2, 1, 0]] + 0.5) * res_voxels.new_tensor(\n                    self.voxel_layer.voxel_size) + res_voxels.new_tensor(\n                        self.voxel_layer.point_cloud_range[0:3])\n            voxels.append(res_voxels)\n            coors.append(res_coors)\n            num_points.append(res_num_points)\n            voxel_centers.append(res_voxel_centers)\n\n        voxels = torch.cat(voxels, dim=0)\n        num_points = torch.cat(num_points, dim=0)\n        voxel_centers = torch.cat(voxel_centers, dim=0)\n        coors_batch = []\n        for i, coor in enumerate(coors):\n            coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)\n            coors_batch.append(coor_pad)\n        coors_batch = torch.cat(coors_batch, dim=0)\n\n        voxel_dict = dict(\n            voxels=voxels,\n            num_points=num_points,\n            coors=coors_batch,\n            voxel_centers=voxel_centers)\n        return voxel_dict\n\n    def forward_train(self,\n                      points,\n                      img_metas,\n                      gt_bboxes_3d,\n                      gt_labels_3d,\n                      gt_bboxes_ignore=None,\n                      proposals=None):\n        \"\"\"Training forward function.\n\n        Args:\n            points (list[torch.Tensor]): Point cloud of each sample.\n            img_metas (list[dict]): Meta information of each sample\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                boxes for each sample.\n            gt_labels_3d (list[torch.Tensor]): Ground truth labels for\n                boxes of each sampole\n            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth\n                boxes to be ignored. Defaults to None.\n\n        Returns:\n            dict: Losses of each branch.\n        \"\"\"\n        feats_dict, voxels_dict = self.extract_feat(points, img_metas)\n\n        losses = dict()\n\n        if self.with_rpn:\n            rpn_outs = self.rpn_head(feats_dict['neck_feats'])\n            rpn_loss_inputs = rpn_outs + (gt_bboxes_3d, gt_labels_3d,\n                                          img_metas)\n            rpn_losses = self.rpn_head.loss(\n                *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)\n            losses.update(rpn_losses)\n\n            proposal_cfg = self.train_cfg.get('rpn_proposal',\n                                              self.test_cfg.rpn)\n            proposal_inputs = rpn_outs + (img_metas, proposal_cfg)\n            proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)\n        else:\n            proposal_list = proposals\n\n        roi_losses = self.roi_head.forward_train(feats_dict, voxels_dict,\n                                                 img_metas, proposal_list,\n                                                 gt_bboxes_3d, gt_labels_3d)\n\n        losses.update(roi_losses)\n\n        return losses\n\n    def simple_test(self, points, img_metas, proposals=None, rescale=False):\n        \"\"\"Test function without augmentaiton.\"\"\"\n        feats_dict, voxels_dict = self.extract_feat(points, img_metas)\n\n        if self.with_rpn:\n            rpn_outs = self.rpn_head(feats_dict['neck_feats'])\n            proposal_cfg = self.test_cfg.rpn\n            bbox_inputs = rpn_outs + (img_metas, proposal_cfg)\n            proposal_list = self.rpn_head.get_bboxes(*bbox_inputs)\n        else:\n            proposal_list = proposals\n\n        return self.roi_head.simple_test(feats_dict, voxels_dict, img_metas,\n                                         proposal_list)\n"
  },
  {
    "path": "mmdet3d/models/detectors/point_rcnn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom ..builder import DETECTORS\nfrom .two_stage import TwoStage3DDetector\n\n\n@DETECTORS.register_module()\nclass PointRCNN(TwoStage3DDetector):\n    r\"\"\"PointRCNN detector.\n\n    Please refer to the `PointRCNN <https://arxiv.org/abs/1812.04244>`_\n\n    Args:\n        backbone (dict): Config dict of detector's backbone.\n        neck (dict, optional): Config dict of neck. Defaults to None.\n        rpn_head (dict, optional): Config of RPN head. Defaults to None.\n        roi_head (dict, optional): Config of ROI head. Defaults to None.\n        train_cfg (dict, optional): Train configs. Defaults to None.\n        test_cfg (dict, optional): Test configs. Defaults to None.\n        pretrained (str, optional): Model pretrained path. Defaults to None.\n        init_cfg (dict, optional): Config of initialization. Defaults to None.\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck=None,\n                 rpn_head=None,\n                 roi_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(PointRCNN, self).__init__(\n            backbone=backbone,\n            neck=neck,\n            rpn_head=rpn_head,\n            roi_head=roi_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained,\n            init_cfg=init_cfg)\n\n    def extract_feat(self, points):\n        \"\"\"Directly extract features from the backbone+neck.\n\n        Args:\n            points (torch.Tensor): Input points.\n\n        Returns:\n            dict: Features from the backbone+neck\n        \"\"\"\n        x = self.backbone(points)\n\n        if self.with_neck:\n            x = self.neck(x)\n        return x\n\n    def forward_train(self, points, img_metas, gt_bboxes_3d, gt_labels_3d):\n        \"\"\"Forward of training.\n\n        Args:\n            points (list[torch.Tensor]): Points of each batch.\n            img_metas (list[dict]): Meta information of each sample.\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.\n            gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.\n\n        Returns:\n            dict: Losses.\n        \"\"\"\n        losses = dict()\n        points_cat = torch.stack(points)\n        x = self.extract_feat(points_cat)\n\n        # features for rcnn\n        backbone_feats = x['fp_features'].clone()\n        backbone_xyz = x['fp_xyz'].clone()\n        rcnn_feats = {'features': backbone_feats, 'points': backbone_xyz}\n\n        bbox_preds, cls_preds = self.rpn_head(x)\n\n        rpn_loss = self.rpn_head.loss(\n            bbox_preds=bbox_preds,\n            cls_preds=cls_preds,\n            points=points,\n            gt_bboxes_3d=gt_bboxes_3d,\n            gt_labels_3d=gt_labels_3d,\n            img_metas=img_metas)\n        losses.update(rpn_loss)\n\n        bbox_list = self.rpn_head.get_bboxes(points_cat, bbox_preds, cls_preds,\n                                             img_metas)\n        proposal_list = [\n            dict(\n                boxes_3d=bboxes,\n                scores_3d=scores,\n                labels_3d=labels,\n                cls_preds=preds_cls)\n            for bboxes, scores, labels, preds_cls in bbox_list\n        ]\n        rcnn_feats.update({'points_cls_preds': cls_preds})\n\n        roi_losses = self.roi_head.forward_train(rcnn_feats, img_metas,\n                                                 proposal_list, gt_bboxes_3d,\n                                                 gt_labels_3d)\n        losses.update(roi_losses)\n\n        return losses\n\n    def simple_test(self, points, img_metas, imgs=None, rescale=False):\n        \"\"\"Forward of testing.\n\n        Args:\n            points (list[torch.Tensor]): Points of each sample.\n            img_metas (list[dict]): Image metas.\n            imgs (list[torch.Tensor], optional): Images of each sample.\n                Defaults to None.\n            rescale (bool, optional): Whether to rescale results.\n                Defaults to False.\n\n        Returns:\n            list: Predicted 3d boxes.\n        \"\"\"\n        points_cat = torch.stack(points)\n\n        x = self.extract_feat(points_cat)\n        # features for rcnn\n        backbone_feats = x['fp_features'].clone()\n        backbone_xyz = x['fp_xyz'].clone()\n        rcnn_feats = {'features': backbone_feats, 'points': backbone_xyz}\n        bbox_preds, cls_preds = self.rpn_head(x)\n        rcnn_feats.update({'points_cls_preds': cls_preds})\n\n        bbox_list = self.rpn_head.get_bboxes(\n            points_cat, bbox_preds, cls_preds, img_metas, rescale=rescale)\n\n        proposal_list = [\n            dict(\n                boxes_3d=bboxes,\n                scores_3d=scores,\n                labels_3d=labels,\n                cls_preds=preds_cls)\n            for bboxes, scores, labels, preds_cls in bbox_list\n        ]\n        bbox_results = self.roi_head.simple_test(rcnn_feats, img_metas,\n                                                 proposal_list)\n\n        return bbox_results\n"
  },
  {
    "path": "mmdet3d/models/detectors/sassd.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.ops import Voxelization\nfrom mmcv.runner import force_fp32\nfrom torch.nn import functional as F\n\nfrom mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d\nfrom mmdet.models.builder import DETECTORS\nfrom .. import builder\nfrom .single_stage import SingleStage3DDetector\n\n\n@DETECTORS.register_module()\nclass SASSD(SingleStage3DDetector):\n    r\"\"\"`SASSD <https://github.com/skyhehe123/SA-SSD>` _ for 3D detection.\"\"\"\n\n    def __init__(self,\n                 voxel_layer,\n                 voxel_encoder,\n                 middle_encoder,\n                 backbone,\n                 neck=None,\n                 bbox_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 init_cfg=None,\n                 pretrained=None):\n        super(SASSD, self).__init__(\n            backbone=backbone,\n            neck=neck,\n            bbox_head=bbox_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            init_cfg=init_cfg,\n            pretrained=pretrained)\n\n        self.voxel_layer = Voxelization(**voxel_layer)\n        self.voxel_encoder = builder.build_voxel_encoder(voxel_encoder)\n        self.middle_encoder = builder.build_middle_encoder(middle_encoder)\n\n    def extract_feat(self, points, img_metas=None, test_mode=False):\n        \"\"\"Extract features from points.\"\"\"\n        voxels, num_points, coors = self.voxelize(points)\n        voxel_features = self.voxel_encoder(voxels, num_points, coors)\n        batch_size = coors[-1, 0].item() + 1\n        x, point_misc = self.middle_encoder(voxel_features, coors, batch_size,\n                                            test_mode)\n        x = self.backbone(x)\n        if self.with_neck:\n            x = self.neck(x)\n        return x, point_misc\n\n    @torch.no_grad()\n    @force_fp32()\n    def voxelize(self, points):\n        \"\"\"Apply hard voxelization to points.\"\"\"\n        voxels, coors, num_points = [], [], []\n        for res in points:\n            res_voxels, res_coors, res_num_points = self.voxel_layer(res)\n            voxels.append(res_voxels)\n            coors.append(res_coors)\n            num_points.append(res_num_points)\n        voxels = torch.cat(voxels, dim=0)\n        num_points = torch.cat(num_points, dim=0)\n        coors_batch = []\n        for i, coor in enumerate(coors):\n            coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)\n            coors_batch.append(coor_pad)\n        coors_batch = torch.cat(coors_batch, dim=0)\n        return voxels, num_points, coors_batch\n\n    def forward_train(self,\n                      points,\n                      img_metas,\n                      gt_bboxes_3d,\n                      gt_labels_3d,\n                      gt_bboxes_ignore=None):\n        \"\"\"Training forward function.\n\n        Args:\n            points (list[torch.Tensor]): Point cloud of each sample.\n            img_metas (list[dict]): Meta information of each sample\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                boxes for each sample.\n            gt_labels_3d (list[torch.Tensor]): Ground truth labels for\n                boxes of each sampole\n            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth\n                boxes to be ignored. Defaults to None.\n\n        Returns:\n            dict: Losses of each branch.\n        \"\"\"\n\n        x, point_misc = self.extract_feat(points, img_metas, test_mode=False)\n        aux_loss = self.middle_encoder.aux_loss(*point_misc, gt_bboxes_3d)\n\n        outs = self.bbox_head(x)\n        loss_inputs = outs + (gt_bboxes_3d, gt_labels_3d, img_metas)\n        losses = self.bbox_head.loss(\n            *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)\n        losses.update(aux_loss)\n        return losses\n\n    def simple_test(self, points, img_metas, imgs=None, rescale=False):\n        \"\"\"Test function without augmentaiton.\"\"\"\n        x, _ = self.extract_feat(points, img_metas, test_mode=True)\n        outs = self.bbox_head(x)\n        bbox_list = self.bbox_head.get_bboxes(\n            *outs, img_metas, rescale=rescale)\n        bbox_results = [\n            bbox3d2result(bboxes, scores, labels)\n            for bboxes, scores, labels in bbox_list\n        ]\n        return bbox_results\n\n    def aug_test(self, points, img_metas, imgs=None, rescale=False):\n        \"\"\"Test function with augmentaiton.\"\"\"\n        feats = self.extract_feats(points, img_metas, test_mode=True)\n\n        # only support aug_test for one sample\n        aug_bboxes = []\n        for x, img_meta in zip(feats, img_metas):\n            outs = self.bbox_head(x)\n            bbox_list = self.bbox_head.get_bboxes(\n                *outs, img_meta, rescale=rescale)\n            bbox_list = [\n                dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels)\n                for bboxes, scores, labels in bbox_list\n            ]\n            aug_bboxes.append(bbox_list[0])\n\n        # after merging, bboxes will be rescaled to the original image size\n        merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas,\n                                            self.bbox_head.test_cfg)\n\n        return [merged_bboxes]\n"
  },
  {
    "path": "mmdet3d/models/detectors/single_stage.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS, build_backbone, build_head, build_neck\nfrom .base import Base3DDetector\n\n\n@DETECTORS.register_module()\nclass SingleStage3DDetector(Base3DDetector):\n    \"\"\"SingleStage3DDetector.\n\n    This class serves as a base class for single-stage 3D detectors.\n\n    Args:\n        backbone (dict): Config dict of detector's backbone.\n        neck (dict, optional): Config dict of neck. Defaults to None.\n        bbox_head (dict, optional): Config dict of box head. Defaults to None.\n        train_cfg (dict, optional): Config dict of training hyper-parameters.\n            Defaults to None.\n        test_cfg (dict, optional): Config dict of test hyper-parameters.\n            Defaults to None.\n        pretrained (str, optional): Path of pretrained models.\n            Defaults to None.\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck=None,\n                 bbox_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 init_cfg=None,\n                 pretrained=None):\n        super(SingleStage3DDetector, self).__init__(init_cfg)\n        self.backbone = build_backbone(backbone)\n        if neck is not None:\n            self.neck = build_neck(neck)\n        bbox_head.update(train_cfg=train_cfg)\n        bbox_head.update(test_cfg=test_cfg)\n        self.bbox_head = build_head(bbox_head)\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n\n    def forward_dummy(self, points):\n        \"\"\"Used for computing network flops.\n\n        See `mmdetection/tools/analysis_tools/get_flops.py`\n        \"\"\"\n        x = self.extract_feat(points)\n        try:\n            sample_mod = self.train_cfg.sample_mod\n            outs = self.bbox_head(x, sample_mod)\n        except AttributeError:\n            outs = self.bbox_head(x)\n        return outs\n\n    def extract_feat(self, points, img_metas=None):\n        \"\"\"Directly extract features from the backbone+neck.\n\n        Args:\n            points (torch.Tensor): Input points.\n        \"\"\"\n        x = self.backbone(points)\n        if self.with_neck:\n            x = self.neck(x)\n        return x\n\n    def extract_feats(self, points, img_metas):\n        \"\"\"Extract features of multiple samples.\"\"\"\n        return [\n            self.extract_feat(pts, img_meta)\n            for pts, img_meta in zip(points, img_metas)\n        ]\n"
  },
  {
    "path": "mmdet3d/models/detectors/single_stage_mono3d.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\nfrom os import path as osp\n\nimport mmcv\nimport numpy as np\nimport torch\nfrom mmcv.parallel import DataContainer as DC\n\nfrom mmdet3d.core import (CameraInstance3DBoxes, bbox3d2result,\n                          show_multi_modality_result)\nfrom mmdet.models.detectors import SingleStageDetector\nfrom ..builder import DETECTORS, build_backbone, build_head, build_neck\n\n\n@DETECTORS.register_module()\nclass SingleStageMono3DDetector(SingleStageDetector):\n    \"\"\"Base class for monocular 3D single-stage detectors.\n\n    Single-stage detectors directly and densely predict bounding boxes on the\n    output features of the backbone+neck.\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck=None,\n                 bbox_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(SingleStageDetector, self).__init__(init_cfg)\n        if pretrained:\n            warnings.warn('DeprecationWarning: pretrained is deprecated, '\n                          'please use \"init_cfg\" instead')\n            backbone.pretrained = pretrained\n        self.backbone = build_backbone(backbone)\n        if neck is not None:\n            self.neck = build_neck(neck)\n        bbox_head.update(train_cfg=train_cfg)\n        bbox_head.update(test_cfg=test_cfg)\n        self.bbox_head = build_head(bbox_head)\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n\n    def extract_feats(self, imgs):\n        \"\"\"Directly extract features from the backbone+neck.\"\"\"\n        assert isinstance(imgs, list)\n        return [self.extract_feat(img) for img in imgs]\n\n    def forward_train(self,\n                      img,\n                      img_metas,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_3d,\n                      gt_labels_3d,\n                      centers2d,\n                      depths,\n                      attr_labels=None,\n                      gt_bboxes_ignore=None):\n        \"\"\"\n        Args:\n            img (Tensor): Input images of shape (N, C, H, W).\n                Typically these should be mean centered and std scaled.\n            img_metas (list[dict]): A List of image info dict where each dict\n                has: 'img_shape', 'scale_factor', 'flip', and may also contain\n                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.\n                For details on the values of these keys see\n                :class:`mmdet.datasets.pipelines.Collect`.\n            gt_bboxes (list[Tensor]): Each item are the truth boxes for each\n                image in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): Class indices corresponding to each box\n            gt_bboxes_3d (list[Tensor]): Each item are the 3D truth boxes for\n                each image in [x, y, z, x_size, y_size, z_size, yaw, vx, vy]\n                format.\n            gt_labels_3d (list[Tensor]): 3D class indices corresponding to\n                each box.\n            centers2d (list[Tensor]): Projected 3D centers onto 2D images.\n            depths (list[Tensor]): Depth of projected centers on 2D images.\n            attr_labels (list[Tensor], optional): Attribute indices\n                corresponding to each box\n            gt_bboxes_ignore (list[Tensor]): Specify which bounding\n                boxes can be ignored when computing the loss.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        x = self.extract_feat(img)\n        losses = self.bbox_head.forward_train(x, img_metas, gt_bboxes,\n                                              gt_labels, gt_bboxes_3d,\n                                              gt_labels_3d, centers2d, depths,\n                                              attr_labels, gt_bboxes_ignore)\n        return losses\n\n    def simple_test(self, img, img_metas, rescale=False):\n        \"\"\"Test function without test time augmentation.\n\n        Args:\n            imgs (list[torch.Tensor]): List of multiple images\n            img_metas (list[dict]): List of image information.\n            rescale (bool, optional): Whether to rescale the results.\n                Defaults to False.\n\n        Returns:\n            list[list[np.ndarray]]: BBox results of each image and classes.\n                The outer list corresponds to each image. The inner list\n                corresponds to each class.\n        \"\"\"\n        x = self.extract_feat(img)\n        outs = self.bbox_head(x)\n        bbox_outputs = self.bbox_head.get_bboxes(\n            *outs, img_metas, rescale=rescale)\n\n        if self.bbox_head.pred_bbox2d:\n            from mmdet.core import bbox2result\n            bbox2d_img = [\n                bbox2result(bboxes2d, labels, self.bbox_head.num_classes)\n                for bboxes, scores, labels, attrs, bboxes2d in bbox_outputs\n            ]\n            bbox_outputs = [bbox_outputs[0][:-1]]\n\n        bbox_img = [\n            bbox3d2result(bboxes, scores, labels, attrs)\n            for bboxes, scores, labels, attrs in bbox_outputs\n        ]\n\n        bbox_list = [dict() for i in range(len(img_metas))]\n        for result_dict, img_bbox in zip(bbox_list, bbox_img):\n            result_dict['img_bbox'] = img_bbox\n        if self.bbox_head.pred_bbox2d:\n            for result_dict, img_bbox2d in zip(bbox_list, bbox2d_img):\n                result_dict['img_bbox2d'] = img_bbox2d\n        return bbox_list\n\n    def aug_test(self, imgs, img_metas, rescale=False):\n        \"\"\"Test function with test time augmentation.\"\"\"\n        feats = self.extract_feats(imgs)\n\n        # only support aug_test for one sample\n        outs_list = [self.bbox_head(x) for x in feats]\n        for i, img_meta in enumerate(img_metas):\n            if img_meta[0]['pcd_horizontal_flip']:\n                for j in range(len(outs_list[i])):  # for each prediction\n                    if outs_list[i][j][0] is None:\n                        continue\n                    for k in range(len(outs_list[i][j])):\n                        # every stride of featmap\n                        outs_list[i][j][k] = torch.flip(\n                            outs_list[i][j][k], dims=[3])\n                reg = outs_list[i][1]\n                for reg_feat in reg:\n                    # offset_x\n                    reg_feat[:, 0, :, :] = 1 - reg_feat[:, 0, :, :]\n                    # velo_x\n                    if self.bbox_head.pred_velo:\n                        reg_feat[:, 7, :, :] = -reg_feat[:, 7, :, :]\n                    # rotation\n                    reg_feat[:, 6, :, :] = -reg_feat[:, 6, :, :] + np.pi\n\n        merged_outs = []\n        for i in range(len(outs_list[0])):  # for each prediction\n            merged_feats = []\n            for j in range(len(outs_list[0][i])):\n                if outs_list[0][i][0] is None:\n                    merged_feats.append(None)\n                    continue\n                # for each stride of featmap\n                avg_feats = torch.mean(\n                    torch.cat([x[i][j] for x in outs_list]),\n                    dim=0,\n                    keepdim=True)\n                if i == 1:  # regression predictions\n                    # rot/velo/2d det keeps the original\n                    avg_feats[:, 6:, :, :] = \\\n                        outs_list[0][i][j][:, 6:, :, :]\n                if i == 2:\n                    # dir_cls keeps the original\n                    avg_feats = outs_list[0][i][j]\n                merged_feats.append(avg_feats)\n            merged_outs.append(merged_feats)\n        merged_outs = tuple(merged_outs)\n\n        bbox_outputs = self.bbox_head.get_bboxes(\n            *merged_outs, img_metas[0], rescale=rescale)\n        if self.bbox_head.pred_bbox2d:\n            from mmdet.core import bbox2result\n            bbox2d_img = [\n                bbox2result(bboxes2d, labels, self.bbox_head.num_classes)\n                for bboxes, scores, labels, attrs, bboxes2d in bbox_outputs\n            ]\n            bbox_outputs = [bbox_outputs[0][:-1]]\n\n        bbox_img = [\n            bbox3d2result(bboxes, scores, labels, attrs)\n            for bboxes, scores, labels, attrs in bbox_outputs\n        ]\n\n        bbox_list = dict()\n        bbox_list.update(img_bbox=bbox_img[0])\n        if self.bbox_head.pred_bbox2d:\n            bbox_list.update(img_bbox2d=bbox2d_img[0])\n\n        return [bbox_list]\n\n    def show_results(self, data, result, out_dir, show=False, score_thr=None):\n        \"\"\"Results visualization.\n\n        Args:\n            data (list[dict]): Input images and the information of the sample.\n            result (list[dict]): Prediction results.\n            out_dir (str): Output directory of visualization result.\n            show (bool, optional): Determines whether you are\n                going to show result by open3d.\n                Defaults to False.\n            TODO: implement score_thr of single_stage_mono3d.\n            score_thr (float, optional): Score threshold of bounding boxes.\n                Default to None.\n                Not implemented yet, but it is here for unification.\n        \"\"\"\n        for batch_id in range(len(result)):\n            if isinstance(data['img_metas'][0], DC):\n                img_filename = data['img_metas'][0]._data[0][batch_id][\n                    'filename']\n                cam2img = data['img_metas'][0]._data[0][batch_id]['cam2img']\n            elif mmcv.is_list_of(data['img_metas'][0], dict):\n                img_filename = data['img_metas'][0][batch_id]['filename']\n                cam2img = data['img_metas'][0][batch_id]['cam2img']\n            else:\n                ValueError(\n                    f\"Unsupported data type {type(data['img_metas'][0])} \"\n                    f'for visualization!')\n            img = mmcv.imread(img_filename)\n            file_name = osp.split(img_filename)[-1].split('.')[0]\n\n            assert out_dir is not None, 'Expect out_dir, got none.'\n\n            pred_bboxes = result[batch_id]['img_bbox']['boxes_3d']\n            assert isinstance(pred_bboxes, CameraInstance3DBoxes), \\\n                f'unsupported predicted bbox type {type(pred_bboxes)}'\n\n            show_multi_modality_result(\n                img,\n                None,\n                pred_bboxes,\n                cam2img,\n                out_dir,\n                file_name,\n                'camera',\n                show=show)\n"
  },
  {
    "path": "mmdet3d/models/detectors/smoke_mono3d.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .single_stage_mono3d import SingleStageMono3DDetector\n\n\n@DETECTORS.register_module()\nclass SMOKEMono3D(SingleStageMono3DDetector):\n    r\"\"\"SMOKE <https://arxiv.org/abs/2002.10111>`_ for monocular 3D object\n        detection.\n\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None):\n        super(SMOKEMono3D, self).__init__(backbone, neck, bbox_head, train_cfg,\n                                          test_cfg, pretrained)\n"
  },
  {
    "path": "mmdet3d/models/detectors/ssd3dnet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .votenet import VoteNet\n\n\n@DETECTORS.register_module()\nclass SSD3DNet(VoteNet):\n    \"\"\"3DSSDNet model.\n\n    https://arxiv.org/abs/2002.10187.pdf\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 bbox_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 init_cfg=None,\n                 pretrained=None):\n        super(SSD3DNet, self).__init__(\n            backbone=backbone,\n            bbox_head=bbox_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            init_cfg=init_cfg,\n            pretrained=pretrained)\n"
  },
  {
    "path": "mmdet3d/models/detectors/two_stage.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nfrom mmdet.models import TwoStageDetector\nfrom ..builder import DETECTORS, build_backbone, build_head, build_neck\nfrom .base import Base3DDetector\n\n\n@DETECTORS.register_module()\nclass TwoStage3DDetector(Base3DDetector, TwoStageDetector):\n    \"\"\"Base class of two-stage 3D detector.\n\n    It inherits original ``:class:TwoStageDetector`` and\n    ``:class:Base3DDetector``. This class could serve as a base class for all\n    two-stage 3D detectors.\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck=None,\n                 rpn_head=None,\n                 roi_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(TwoStageDetector, self).__init__(init_cfg)\n        if pretrained:\n            warnings.warn('DeprecationWarning: pretrained is deprecated, '\n                          'please use \"init_cfg\" instead')\n            backbone.pretrained = pretrained\n        self.backbone = build_backbone(backbone)\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        if neck is not None:\n            self.neck = build_neck(neck)\n\n        if rpn_head is not None:\n            rpn_train_cfg = train_cfg.rpn if train_cfg is not None else None\n            rpn_head_ = rpn_head.copy()\n            rpn_head_.update(train_cfg=rpn_train_cfg, test_cfg=test_cfg.rpn)\n            self.rpn_head = build_head(rpn_head_)\n\n        if roi_head is not None:\n            # update train and test cfg here for now\n            # TODO: refactor assigner & sampler\n            rcnn_train_cfg = train_cfg.rcnn if train_cfg is not None else None\n            roi_head.update(train_cfg=rcnn_train_cfg)\n            roi_head.update(test_cfg=test_cfg.rcnn)\n            roi_head.pretrained = pretrained\n            self.roi_head = build_head(roi_head)\n"
  },
  {
    "path": "mmdet3d/models/detectors/votenet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d\nfrom ..builder import DETECTORS\nfrom .single_stage import SingleStage3DDetector\n\n\n@DETECTORS.register_module()\nclass VoteNet(SingleStage3DDetector):\n    r\"\"\"`VoteNet <https://arxiv.org/pdf/1904.09664.pdf>`_ for 3D detection.\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 bbox_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 init_cfg=None,\n                 pretrained=None):\n        super(VoteNet, self).__init__(\n            backbone=backbone,\n            bbox_head=bbox_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            init_cfg=None,\n            pretrained=pretrained)\n\n    def forward_train(self,\n                      points,\n                      img_metas,\n                      gt_bboxes_3d,\n                      gt_labels_3d,\n                      pts_semantic_mask=None,\n                      pts_instance_mask=None,\n                      gt_bboxes_ignore=None):\n        \"\"\"Forward of training.\n\n        Args:\n            points (list[torch.Tensor]): Points of each batch.\n            img_metas (list): Image metas.\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.\n            gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.\n            pts_semantic_mask (list[torch.Tensor]): point-wise semantic\n                label of each batch.\n            pts_instance_mask (list[torch.Tensor]): point-wise instance\n                label of each batch.\n            gt_bboxes_ignore (list[torch.Tensor]): Specify\n                which bounding.\n\n        Returns:\n            dict: Losses.\n        \"\"\"\n        points_cat = torch.stack(points)\n\n        x = self.extract_feat(points_cat)\n        bbox_preds = self.bbox_head(x, self.train_cfg.sample_mod)\n        loss_inputs = (points, gt_bboxes_3d, gt_labels_3d, pts_semantic_mask,\n                       pts_instance_mask, img_metas)\n        losses = self.bbox_head.loss(\n            bbox_preds, *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)\n        return losses\n\n    def simple_test(self, points, img_metas, imgs=None, rescale=False):\n        \"\"\"Forward of testing.\n\n        Args:\n            points (list[torch.Tensor]): Points of each sample.\n            img_metas (list): Image metas.\n            rescale (bool): Whether to rescale results.\n\n        Returns:\n            list: Predicted 3d boxes.\n        \"\"\"\n        points_cat = torch.stack(points)\n\n        x = self.extract_feat(points_cat)\n        bbox_preds = self.bbox_head(x, self.test_cfg.sample_mod)\n        bbox_list = self.bbox_head.get_bboxes(\n            points_cat, bbox_preds, img_metas, rescale=rescale)\n        bbox_results = [\n            bbox3d2result(bboxes, scores, labels)\n            for bboxes, scores, labels in bbox_list\n        ]\n        return bbox_results\n\n    def aug_test(self, points, img_metas, imgs=None, rescale=False):\n        \"\"\"Test with augmentation.\"\"\"\n        points_cat = [torch.stack(pts) for pts in points]\n        feats = self.extract_feats(points_cat, img_metas)\n\n        # only support aug_test for one sample\n        aug_bboxes = []\n        for x, pts_cat, img_meta in zip(feats, points_cat, img_metas):\n            bbox_preds = self.bbox_head(x, self.test_cfg.sample_mod)\n            bbox_list = self.bbox_head.get_bboxes(\n                pts_cat, bbox_preds, img_meta, rescale=rescale)\n            bbox_list = [\n                dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels)\n                for bboxes, scores, labels in bbox_list\n            ]\n            aug_bboxes.append(bbox_list[0])\n\n        # after merging, bboxes will be rescaled to the original image size\n        merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas,\n                                            self.bbox_head.test_cfg)\n\n        return [merged_bboxes]\n"
  },
  {
    "path": "mmdet3d/models/detectors/voxelnet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.ops import Voxelization\nfrom mmcv.runner import force_fp32\nfrom torch.nn import functional as F\n\nfrom mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d\nfrom .. import builder\nfrom ..builder import DETECTORS\nfrom .single_stage import SingleStage3DDetector\n\n\n@DETECTORS.register_module()\nclass VoxelNet(SingleStage3DDetector):\n    r\"\"\"`VoxelNet <https://arxiv.org/abs/1711.06396>`_ for 3D detection.\"\"\"\n\n    def __init__(self,\n                 voxel_layer,\n                 voxel_encoder,\n                 middle_encoder,\n                 backbone,\n                 neck=None,\n                 bbox_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 init_cfg=None,\n                 pretrained=None):\n        super(VoxelNet, self).__init__(\n            backbone=backbone,\n            neck=neck,\n            bbox_head=bbox_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            init_cfg=init_cfg,\n            pretrained=pretrained)\n        self.voxel_layer = Voxelization(**voxel_layer)\n        self.voxel_encoder = builder.build_voxel_encoder(voxel_encoder)\n        self.middle_encoder = builder.build_middle_encoder(middle_encoder)\n\n    def extract_feat(self, points, img_metas=None):\n        \"\"\"Extract features from points.\"\"\"\n\n        voxels, num_points, coors = self.voxelize(points)\n        voxel_features = self.voxel_encoder(voxels, num_points, coors)\n        batch_size = coors[-1, 0].item() + 1\n        x = self.middle_encoder(voxel_features, coors, batch_size)\n        x = self.backbone(x)\n        if self.with_neck:\n            x = self.neck(x)\n        return x\n\n    @torch.no_grad()\n    @force_fp32()\n    def voxelize(self, points):\n        \"\"\"Apply hard voxelization to points.\"\"\"\n        voxels, coors, num_points = [], [], []\n        for res in points:\n            res_voxels, res_coors, res_num_points = self.voxel_layer(res)\n            voxels.append(res_voxels)\n            coors.append(res_coors)\n            num_points.append(res_num_points)\n        voxels = torch.cat(voxels, dim=0)\n        num_points = torch.cat(num_points, dim=0)\n        coors_batch = []\n        for i, coor in enumerate(coors):\n            coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)\n            coors_batch.append(coor_pad)\n        coors_batch = torch.cat(coors_batch, dim=0)\n        return voxels, num_points, coors_batch\n\n    def forward_train(self,\n                      points,\n                      img_metas,\n                      gt_bboxes_3d,\n                      gt_labels_3d,\n                      gt_bboxes_ignore=None):\n        \"\"\"Training forward function.\n\n        Args:\n            points (list[torch.Tensor]): Point cloud of each sample.\n            img_metas (list[dict]): Meta information of each sample\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                boxes for each sample.\n            gt_labels_3d (list[torch.Tensor]): Ground truth labels for\n                boxes of each sampole\n            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth\n                boxes to be ignored. Defaults to None.\n\n        Returns:\n            dict: Losses of each branch.\n        \"\"\"\n        x = self.extract_feat(points, img_metas)\n        outs = self.bbox_head(x)\n        loss_inputs = outs + (gt_bboxes_3d, gt_labels_3d, img_metas)\n        losses = self.bbox_head.loss(\n            *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)\n        return losses\n\n    def simple_test(self, points, img_metas, imgs=None, rescale=False):\n        \"\"\"Test function without augmentaiton.\"\"\"\n        x = self.extract_feat(points, img_metas)\n        outs = self.bbox_head(x)\n        bbox_list = self.bbox_head.get_bboxes(\n            *outs, img_metas, rescale=rescale)\n        bbox_results = [\n            bbox3d2result(bboxes, scores, labels)\n            for bboxes, scores, labels in bbox_list\n        ]\n        return bbox_results\n\n    def aug_test(self, points, img_metas, imgs=None, rescale=False):\n        \"\"\"Test function with augmentaiton.\"\"\"\n        feats = self.extract_feats(points, img_metas)\n\n        # only support aug_test for one sample\n        aug_bboxes = []\n        for x, img_meta in zip(feats, img_metas):\n            outs = self.bbox_head(x)\n            bbox_list = self.bbox_head.get_bboxes(\n                *outs, img_meta, rescale=rescale)\n            bbox_list = [\n                dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels)\n                for bboxes, scores, labels in bbox_list\n            ]\n            aug_bboxes.append(bbox_list[0])\n\n        # after merging, bboxes will be rescaled to the original image size\n        merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas,\n                                            self.bbox_head.test_cfg)\n\n        return [merged_bboxes]\n"
  },
  {
    "path": "mmdet3d/models/fbbev/__init__.py",
    "content": "from .detectors import *\nfrom .modules import *\nfrom .utils import *\nfrom .view_transformation import *\nfrom .heads import *\nfrom .streampetr import *\nfrom .track_head import *\nfrom .streammapnet import *\nfrom .motion_head import *\nfrom .planner_head import *"
  },
  {
    "path": "mmdet3d/models/fbbev/detectors/__init__.py",
    "content": "# Copyright (c) 2023-2024, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# TODO: add license here\n\nfrom .bev_planner import BEVPlanner"
  },
  {
    "path": "mmdet3d/models/fbbev/detectors/bev_planner.py",
    "content": "# Copyright (c) 2023-2024, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# TODO: add license here\n\nimport torch\nimport torch.nn.functional as F\nimport torch.nn as nn\nfrom mmcv.runner import force_fp32\nimport os\nfrom mmdet.models import DETECTORS\nfrom mmdet3d.models import builder\nfrom mmdet3d.models.detectors import CenterPoint\nfrom mmdet3d.models.builder import build_head, build_neck\nimport numpy as np\nimport torch\nimport torchvision\nimport matplotlib\nimport cv2\nimport mmcv\nfrom ..utils.grid_mask import GridMask\nfrom ..utils.bricks import save_tensor\n\ndef generate_forward_transformation_matrix(bda, img_meta_dict=None):\n    b = bda.size(0)\n    hom_res = torch.eye(4)[None].repeat(b, 1, 1).to(bda.device)\n    for i in range(b):\n        hom_res[i, :3, :3] = bda[i]\n    return hom_res\n\n\n@DETECTORS.register_module()\nclass BEVPlanner(CenterPoint):\n\n    def __init__(self, \n                 # BEVDet components\n                 img_bev_encoder_backbone=None,\n                 img_bev_encoder_neck=None,\n                 forward_projection=None,\n                 # BEVFormer components\n                 backward_projection=None,\n                 # FB-BEV components\n                 frpn=None,\n                 # other modules\n                 depth_net=None,\n                 occupancy_head=None,\n                 img_det_2d_head=None,\n                 map_head=None,\n                 motion_head=None,\n                 planner_head=None,\n                 # other settings.\n                 use_depth_supervision=False,\n                 add_forward_backbward_feats=False,\n                 fix_void=False,\n                 occupancy_save_path=None,\n                 do_history=True,\n                 interpolation_mode='bilinear',\n                 fuse_history_bev=True,\n                 history_cat_num=16,\n                 history_cat_conv_out_channels=None,\n                 embed_dims=80,\n                 single_bev_num_channels=80,\n                 use_grid_mask=False,\n                 yolox_use_ml_feats=False,\n                 with_ego_status=False,\n                 align_prev_bev=True,\n                  **kwargs):\n        \"\"\"\n            Parameters:\n                img_bev_encoder_backbone - \n                img_bev_encoder_neck - \n                forward_projection - \n                backward_projection -\n                frpn - foreground region proposal network, used in FB-BEV\n                depth_net -\n                occupancy_head -\n                img_det_2d_head -\n                map_head -\n                motion_head -\n                planner_head -\n                \n                use_depth_supervision -\n                add_forward_backbward_feats -\n                fix_void - Used to fix legacy bugs in Occupancy\n                occupancy_save_path -\n                do_history - A Flag to start the temporal traning at i-th epoch\n                interpolation_mode -\n                fuse_history_bev - Weather to use history bev, which is different from `do_hisitory` \n                history_cat_num -\n                history_cat_conv_out_channels -\n                single_bev_num_channels -\n                use_grid_mask -\n                yolox_use_ml_feats -\n                with_ego_status -\n        \"\"\"\n        super(BEVPlanner, self).__init__(**kwargs)\n        self.fix_void = fix_void\n      \n        # BEVDet init\n        self.forward_projection = builder.build_neck(forward_projection) if forward_projection else None\n        self.img_bev_encoder_backbone = builder.build_backbone(img_bev_encoder_backbone) if img_bev_encoder_backbone else None\n        self.img_bev_encoder_neck = builder.build_neck(img_bev_encoder_neck) if img_bev_encoder_neck else None\n\n        # BEVFormer init\n        self.backward_projection = builder.build_head(backward_projection) if backward_projection else None\n    \n        # FB-BEV init\n        if not self.forward_projection: assert not frpn, 'frpn relies on LSS'\n        self.frpn = builder.build_head(frpn) if frpn else None\n\n        # Depth Net\n        self.depth_net = builder.build_head(depth_net) if depth_net else None\n\n        # Occupancy Head\n        self.occupancy_head = builder.build_head(occupancy_head) if occupancy_head else None\n\n        # 2D det head\n        self.img_det_2d_head = builder.build_head(img_det_2d_head) if img_det_2d_head else None\n\n        # map head\n        if map_head:\n            map_head['train_cfg'] = kwargs.get('train_cfg', None)\n            self.map_head = builder.build_head(map_head)\n        else: \n            self.map_head = None\n\n        # motion\n        self.motion_head = builder.build_head(motion_head) if motion_head else None        \n\n        # planner\n        self.planner_head = builder.build_head(planner_head) if planner_head else None\n        \n        self.embed_dims = embed_dims\n\n        self.use_grid_mask = use_grid_mask\n        if self.use_grid_mask:\n            self.grid_mask = GridMask(True, True, rotate=1, offset=False, ratio=0.5, mode=1, prob=0.7)\n        \n        self.add_forward_backbward_feats = add_forward_backbward_feats # fuse voxel features and bev features\n        self.use_depth_supervision = use_depth_supervision\n        self.yolox_use_ml_feats = yolox_use_ml_feats\n        self.occupancy_save_path = occupancy_save_path # for saving data\\for submitting to test server\n\n\n        self.with_ego_status = with_ego_status\n        if self.with_ego_status:\n            self.can_bus_mlp = nn.Sequential(\n                nn.Linear(9, self.embed_dims // 2),\n                nn.ReLU(inplace=True),\n                nn.Linear(self.embed_dims // 2, self.embed_dims),\n                nn.ReLU(inplace=True),\n                nn.LayerNorm(self.embed_dims)\n            )\n        # Deal with history\n        self.single_bev_num_channels = single_bev_num_channels\n        self.do_history = do_history\n        self.interpolation_mode = interpolation_mode\n        self.history_cat_num = history_cat_num\n        self.history_cam_sweep_freq = 0.5 # seconds between each frame\n        self.history_cat_conv_out_channels = history_cat_conv_out_channels\n        self.align_prev_bev=align_prev_bev\n        self.fuse_history_bev = fuse_history_bev\n        if self.fuse_history_bev:\n            self._init_fuse_layers()\n        self.history_sweep_time = None\n        self.history_bev = None\n        self.history_bev_before_encoder = None\n        self.history_seq_ids = None\n        self.history_forward_augs = None\n\n    def _init_fuse_layers(self):\n        history_cat_conv_out_channels = (self.history_cat_conv_out_channels \n                                         if self.history_cat_conv_out_channels is not None \n                                         else self.single_bev_num_channels)\n        ## Embed each sample with its relative temporal offset with current timestep\n    \n        conv = nn.Conv2d if self.forward_projection.nx[-1] == 1 else nn.Conv3d\n        self.history_keyframe_time_conv = nn.Sequential(\n             conv(self.single_bev_num_channels + 1,\n                     self.single_bev_num_channels,\n                     kernel_size=1,\n                     padding=0,\n                     stride=1),\n             nn.SyncBatchNorm(self.single_bev_num_channels),\n             nn.ReLU(inplace=True))\n        ## Then concatenate and send them through an MLP.\n        self.history_keyframe_cat_conv = nn.Sequential(\n            conv(self.single_bev_num_channels * (self.history_cat_num + 1),\n                    history_cat_conv_out_channels,\n                    kernel_size=1,\n                    padding=0,\n                    stride=1),\n            nn.SyncBatchNorm(history_cat_conv_out_channels),\n            nn.ReLU(inplace=True))\n\n\n\n    def with_specific_component(self, component_name):\n        \"\"\"Whether the model owns a specific component\"\"\"\n        return getattr(self, component_name, None) is not None\n    \n    def image_encoder(self, img):\n        \"\"\"\n        Return (single_scale_context, multi_scale_context:[List])\n        \n        single scale_context are counsumed by forward projection\n        multi_scale_context are consumed by some perception heads like yolox\n        \"\"\"\n        imgs = img\n        B, N, C, imH, imW = imgs.shape\n        imgs = imgs.view(B * N, C, imH, imW)\n        if self.use_grid_mask:\n            imgs = self.grid_mask(imgs)\n        x = self.img_backbone(imgs)\n        \n        if self.with_img_neck:\n            x_list = self.img_neck(x)\n            if type(x_list) in [list, tuple]:\n                x_list = list(x_list)\n                for i, x in enumerate(x_list):\n                    _, output_dim, ouput_H, output_W = x.shape\n                    x_list[i] = x.view(B, N, output_dim, ouput_H, output_W)\n                return x_list[1], x_list\n            else:\n                _, output_dim, ouput_H, output_W = x_list.shape\n                return x_list.view(B, N, output_dim, ouput_H, output_W), [x_list.view(B, N, output_dim, ouput_H, output_W)]\n        \n\n    @force_fp32()\n    def bev_encoder(self, x):\n        if self.with_specific_component('img_bev_encoder_backbone'):\n            x = self.img_bev_encoder_backbone(x)\n        \n        if self.with_specific_component('img_bev_encoder_neck'):\n            x = self.img_bev_encoder_neck(x)\n        \n        if type(x) not in [list, tuple]:\n             x = [x]\n\n        return x\n    \n    @force_fp32()\n    def fuse_history(self, curr_bev, img_metas, bda): # align features with 3d shift\n\n        if curr_bev is None: return None\n        voxel_feat = True  if len(curr_bev.shape) == 5 else False\n        if voxel_feat:\n            curr_bev = curr_bev.permute(0, 1, 4, 2, 3) # n, c, z, h, w\n        \n        seq_ids = torch.LongTensor([\n            single_img_metas['sequence_group_idx'] \n            for single_img_metas in img_metas]).to(curr_bev.device)\n        start_of_sequence = torch.BoolTensor([\n            single_img_metas['start_of_sequence'] \n            for single_img_metas in img_metas]).to(curr_bev.device)\n        forward_augs = generate_forward_transformation_matrix(bda)\n      \n        # print('sqe_ids', seq_ids, ' start_of_sequence ', start_of_sequence.tolist(), ' index ', img_metas[0]['index'], img_metas[0]['scene_name'])\n\n        curr_to_prev_ego_rt = torch.stack([\n            single_img_metas['curr_to_prev_ego_rt']\n            for single_img_metas in img_metas]).to(curr_bev)\n\n        if not self.align_prev_bev:\n            curr_to_prev_ego_rt= torch.eye(4).repeat(curr_to_prev_ego_rt.size(0), 1, 1).to(curr_bev)\n\n        ## Deal with first batch\n        if self.history_bev is None:\n            self.history_bev = curr_bev.clone()\n            self.history_seq_ids = seq_ids.clone()\n            self.history_forward_augs = forward_augs.clone()\n\n            # Repeat the first frame feature to be history\n            if voxel_feat:\n                self.history_bev = curr_bev.repeat(1, self.history_cat_num, 1, 1, 1) \n            else:\n                self.history_bev = curr_bev.repeat(1, self.history_cat_num, 1, 1)\n            # All 0s, representing current timestep.\n            self.history_sweep_time = curr_bev.new_zeros(curr_bev.shape[0], self.history_cat_num)\n\n\n        self.history_bev = self.history_bev.detach()\n\n        assert self.history_bev.dtype == torch.float32\n\n        ## Deal with the new sequences\n        # First, sanity check. For every non-start of sequence, history id and seq id should be same.\n\n        assert (self.history_seq_ids != seq_ids)[~start_of_sequence].sum() == 0, \\\n                \"{}, {}, {}\".format(self.history_seq_ids, seq_ids, start_of_sequence)\n\n        ## Replace all the new sequences' positions in history with the curr_bev information\n        self.history_sweep_time += 1 # new timestep, everything in history gets pushed back one.\n        if start_of_sequence.sum()>0:\n            if voxel_feat:    \n                self.history_bev[start_of_sequence] = curr_bev[start_of_sequence].repeat(1, self.history_cat_num, 1, 1, 1)\n            else:\n                self.history_bev[start_of_sequence] = curr_bev[start_of_sequence].repeat(1, self.history_cat_num, 1, 1)\n            \n            self.history_sweep_time[start_of_sequence] = 0 # zero the new sequence timestep starts\n            self.history_seq_ids[start_of_sequence] = seq_ids[start_of_sequence]\n            self.history_forward_augs[start_of_sequence] = forward_augs[start_of_sequence]\n\n        ## Get grid idxs & grid2bev first.\n        if voxel_feat:\n            n, c_, z, h, w = curr_bev.shape\n        else:\n            n, c_, h, w = curr_bev.shape\n            z = 1\n\n        # Generate grid\n        xs = torch.linspace(0, w - 1, w, dtype=curr_bev.dtype, device=curr_bev.device).view(1, w, 1).expand(h, w, z)\n        ys = torch.linspace(0, h - 1, h, dtype=curr_bev.dtype, device=curr_bev.device).view(h, 1, 1).expand(h, w, z)\n        zs = torch.linspace(0, z - 1, z, dtype=curr_bev.dtype, device=curr_bev.device).view(1, 1, z).expand(h, w, z)\n        grid = torch.stack(\n            (xs, ys,  zs, torch.ones_like(xs)), -1).view(1, h, w, z, 4).expand(n, h, w, z, 4).view(n, h, w, z, 4, 1)\n\n        # This converts BEV indices to meters\n        # IMPORTANT: the feat2bev[0, 3] is changed from feat2bev[0, 2] because previous was 2D rotation\n        # which has 2-th index as the hom index. Now, with 3D hom, 3-th is hom\n        feat2bev = torch.zeros((4,4),dtype=grid.dtype).to(grid)\n        feat2bev[0, 0] = self.forward_projection.dx[0]\n        feat2bev[1, 1] = self.forward_projection.dx[1]\n        feat2bev[2, 2] = self.forward_projection.dx[2]\n        feat2bev[0, 3] = self.forward_projection.bx[0] - self.forward_projection.dx[0] / 2.\n        feat2bev[1, 3] = self.forward_projection.bx[1] - self.forward_projection.dx[1] / 2.\n        feat2bev[2, 3] = self.forward_projection.bx[2] - self.forward_projection.dx[2] / 2.\n        feat2bev[3, 3] = 1\n        feat2bev = feat2bev.view(1,4,4)\n        \n        ## Get flow for grid sampling.\n        # The flow is as follows. Starting from grid locations in curr bev, transform to BEV XY11,\n        # backward of current augmentations, curr lidar to prev lidar, forward of previous augmentations,\n        # transform to previous grid locations.\n        rt_flow = (torch.inverse(feat2bev) @ self.history_forward_augs @ curr_to_prev_ego_rt\n                   @ torch.inverse(forward_augs) @ feat2bev)\n        grid = rt_flow.view(n, 1, 1, 1, 4, 4) @ grid\n        \n\n        # normalize and sample\n        if voxel_feat:\n            normalize_factor = torch.tensor([w - 1.0, h - 1.0, z - 1.0], dtype=curr_bev.dtype, device=curr_bev.device)\n            grid = grid[:,:,:,:, :3,0] / normalize_factor.view(1, 1, 1, 1, 3) * 2.0 - 1.0\n        else:\n            normalize_factor = torch.tensor([w - 1.0, h - 1.0], dtype=curr_bev.dtype, device=curr_bev.device)\n            grid = grid[:,:,:,:, :2,0] / normalize_factor.view(1, 1, 1, 1, 2) * 2.0 - 1.0           \n\n        tmp_bev = self.history_bev\n        if voxel_feat: \n            n, mc, z, h, w = tmp_bev.shape\n            tmp_bev = tmp_bev.reshape(n, mc, z, h, w)\n            grid = grid.to(curr_bev.dtype).permute(0, 3, 1, 2, 4)\n        else:\n            grid = grid.to(curr_bev.dtype).squeeze(-2)\n\n        # save_tensor(tmp_bev[0].clamp(min=-1, max=1).reshape(4, 80, 128, 128).abs().mean(1), f'curr_{self.count}_pre.png')\n        sampled_history_bev = F.grid_sample(tmp_bev, grid, align_corners=True, mode=self.interpolation_mode)\n        # save_tensor(sampled_history_bev[0].clamp(min=-1, max=1).reshape(4, 80, 128, 128).abs().mean(1), f'curr_{self.count}_after.png')\n        # save_tensor(curr_bev.clamp(min=-1, max=1).abs().mean(1), f'curr_{self.count}.png')\n        # self.count += 1\n        # if self.count == 10:\n\n        ## Update history\n        # Add in current frame to features & timestep\n        self.history_sweep_time = torch.cat(\n            [self.history_sweep_time.new_zeros(self.history_sweep_time.shape[0], 1), self.history_sweep_time],\n            dim=1) # B x (1 + T)\n\n        if voxel_feat:\n            sampled_history_bev = sampled_history_bev.reshape(n, mc, z, h, w)\n            curr_bev = curr_bev.reshape(n, c_, z, h, w)\n        feats_cat = torch.cat([curr_bev, sampled_history_bev], dim=1) # B x (1 + T) * 80 x H x W or B x (1 + T) * 80 xZ x H x W \n\n        # Reshape and concatenate features and timestep\n        feats_to_return = feats_cat.reshape(\n                feats_cat.shape[0], self.history_cat_num + 1, self.single_bev_num_channels, *feats_cat.shape[2:]) # B x (1 + T) x 80 x H x W\n        if voxel_feat:\n            feats_to_return = torch.cat(\n            [feats_to_return, self.history_sweep_time[:, :, None, None, None, None].repeat(\n                1, 1, 1, *feats_to_return.shape[3:]) * self.history_cam_sweep_freq\n            ], dim=2) # B x (1 + T) x 81 x Z x H x W\n        else:\n            feats_to_return = torch.cat(\n            [feats_to_return, self.history_sweep_time[:, :, None, None, None].repeat(\n                1, 1, 1, feats_to_return.shape[3], feats_to_return.shape[4]) * self.history_cam_sweep_freq\n            ], dim=2) # B x (1 + T) x 81 x H x W\n\n        # Time conv\n        feats_to_return = self.history_keyframe_time_conv(\n            feats_to_return.reshape(-1, *feats_to_return.shape[2:])).reshape(\n                feats_to_return.shape[0], feats_to_return.shape[1], -1, *feats_to_return.shape[3:]) # B x (1 + T) x 80 xZ x H x W\n\n        # Cat keyframes & conv\n        feats_to_return = self.history_keyframe_cat_conv(\n            feats_to_return.reshape(\n                feats_to_return.shape[0], -1, *feats_to_return.shape[3:])) # B x C x H x W or B x C x Z x H x W\n        \n        self.history_bev = feats_cat[:, :-self.single_bev_num_channels, ...].detach().clone()\n        self.history_sweep_time = self.history_sweep_time[:, :-1]\n        self.history_forward_augs = forward_augs.clone()\n        if voxel_feat:\n            feats_to_return = feats_to_return.permute(0, 1, 3, 4, 2)\n        if not self.do_history:\n            self.history_bev = None\n        return feats_to_return.clone()\n\n\n    def extract_img_bev_feat(self, img, img_metas, **kwargs):\n        \"\"\"Extract features of images.\"\"\"\n\n        return_map = {}\n\n        context, mlvl_context = self.image_encoder(img[0])\n\n        cam_params = img[1:7]\n        if self.with_specific_component('depth_net'):\n            mlp_input = self.depth_net.get_mlp_input(*cam_params)\n            context, depth = self.depth_net(context, mlp_input)\n        else:\n            depth=None\n                \n\n        if self.with_specific_component('forward_projection'):\n            bev_feat = self.forward_projection(cam_params, context, depth, **kwargs)\n        else:\n            bev_feat = None\n        \n        if self.with_specific_component('frpn'): # not used in FB-OCC\n            assert bev_feat is not None\n            bev_mask_logit = self.frpn(bev_feat)\n            bev_mask = bev_mask_logit.sigmoid() > self.frpn.mask_thre            \n            if bev_mask.requires_grad: # during training phase\n                gt_bev_mask = kwargs['gt_bev_mask'].to(torch.bool)\n                bev_mask = gt_bev_mask | bev_mask\n            return_map['bev_mask_logit'] = bev_mask_logit    \n        else:\n            bev_mask = None\n\n        if self.with_specific_component('backward_projection'):\n            bev_feat_refined = self.backward_projection([context],\n                                        img_metas,\n                                        lss_bev=bev_feat.mean(-1),\n                                        cam_params=cam_params,\n                                        bev_mask=bev_mask,\n                                        gt_bboxes_3d=None, # debug\n                                        pred_img_depth=depth)  \n                                        \n            if self.add_forward_backbward_feats:\n                bev_feat = bev_feat_refined[..., None] + bev_feat\n            else:\n                bev_feat = bev_feat_refined\n\n        # Fuse History\n        if self.fuse_history_bev:\n            bev_feat = self.fuse_history(bev_feat, img_metas, img[6])\n\n        if self.with_ego_status:\n            can_bus_info = torch.cat(kwargs['can_bus_info'])\n            bev_feat = bev_feat + self.can_bus_mlp(can_bus_info)[:, :, None, None]\n\n        bev_feat = self.bev_encoder(bev_feat)\n        \n        \n        return_map['context'] = mlvl_context if self.yolox_use_ml_feats else context\n        return_map['depth'] = depth\n        return_map['cam_params'] = cam_params\n        return_map['img_bev_feat'] = bev_feat\n\n        return return_map\n\n    def extract_lidar_bev_feat(self, pts, img_feats, img_metas):\n        \"\"\"Extract features of points.\"\"\"\n\n        voxels, num_points, coors = self.voxelize(pts)\n\n        voxel_features = self.pts_voxel_encoder(voxels, num_points, coors)\n        batch_size = coors[-1, 0] + 1\n        bev_feat = self.pts_middle_encoder(voxel_features, coors, batch_size)\n        bev_feat = self.pts_backbone(bev_feat)\n        if self.with_pts_neck:\n            bev_feat = self.pts_neck(bev_feat)\n        bev_feat = self.bev_encoder(bev_feat)\n        return dict(lidar_bev_feat=bev_feat)\n\n    def extract_feat(self, points, img, img_metas, **kwargs):\n        \"\"\"Extract features from images and points.\"\"\"\n        results={}\n        if img is not None and self.with_specific_component('image_encoder'):\n            results.update(self.extract_img_bev_feat(img, img_metas, **kwargs))\n        if points is not None and self.with_specific_component('pts_voxel_encoder'):\n            results.update(self.extract_lidar_bev_feat(points, img, img_metas))\n\n        return results\n\n    def forward_train(self,\n                      points=None,\n                      img_metas=None,\n                      gt_bboxes_3d=None,\n                      gt_labels_3d=None,\n                      gt_labels=None,\n                      gt_bboxes=None,\n                      img_inputs=None,\n                      proposals=None,\n                      gt_bboxes_ignore=None,\n                      **kwargs):\n        \"\"\"Forward training function.\n\n        Args:\n            points (list[torch.Tensor], optional): Points of each sample.\n                Defaults to None.\n            img_metas (list[dict], optional): Meta information of each sample.\n                Defaults to None.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`], optional):\n                Ground truth 3D boxes. Defaults to None.\n            gt_labels_3d (list[torch.Tensor], optional): Ground truth labels\n                of 3D boxes. Defaults to None.\n            gt_labels (list[torch.Tensor], optional): Ground truth labels\n                of 2D boxes in images. Defaults to None.\n            gt_bboxes (list[torch.Tensor], optional): Ground truth 2D boxes in\n                images. Defaults to None.\n            img (torch.Tensor optional): Images of each sample with shape\n                (N, C, H, W). Defaults to None.\n            proposals ([list[torch.Tensor], optional): Predicted proposals\n                used for training Fast RCNN. Defaults to None.\n            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth\n                2D boxes in images to be ignored. Defaults to None.\n\n        Returns:\n            dict: Losses of different branches.\n        \"\"\"\n\n        results= self.extract_feat(\n            points, img=img_inputs, img_metas=img_metas, **kwargs)\n        losses = dict()\n\n        if self.with_pts_bbox:\n            preds_agent_dicts = self.pts_bbox_head(results, img_metas,  gt_bboxes_3d, gt_labels_3d)\n            losses_pts, agent_instances = self.pts_bbox_head.loss(gt_bboxes_3d,\n                                            gt_labels_3d, preds_agent_dicts, img_metas)\n            losses.update(losses_pts)\n        \n        if self.with_specific_component('img_det_2d_head'):\n            if type(results['context']) not in [list, tuple]:\n                context = [results['context']]\n            else: context = results['context']\n            preds_2ddet_dicts = self.img_det_2d_head(context)\n            losses.update(\n                self.img_det_2d_head.loss(\n                    kwargs['gt_bboxes_2d'],\n                    kwargs['gt_labels_2d'],\n                    kwargs['centers2d'],\n                    preds_2ddet_dicts,\n                    kwargs['depths2d'],\n                    img_metas, #len=B\n                )\n            )  \n\n        if self.with_specific_component('occupancy_head'):\n            losses_occupancy = self.occupancy_head.forward_train(results['img_bev_feat'], results=results, gt_occupancy=kwargs['gt_occupancy'], gt_occupancy_flow=kwargs['gt_occupancy_flow'])\n            losses.update(losses_occupancy)\n\n        if self.with_specific_component('map_head'):\n            loss_map_dict, preds_map_dicts = self.map_head.forward(results, img_metas,  kwargs['map_gt_bboxes_3d'], kwargs['map_gt_labels_3d'], return_loss=True)\n            losses.update(loss_map_dict)\n        else: preds_map_dicts = [None] # dummy\n\n        if self.with_specific_component('frpn'):\n            losses_mask = self.frpn.get_bev_mask_loss(kwargs['gt_bev_mask'], results['bev_mask_logit'])\n            losses.update(losses_mask)\n\n        if self.use_depth_supervision and self.with_specific_component('depth_net'):\n            loss_depth = self.depth_net.get_depth_loss(kwargs['gt_depth'], results['depth'])\n            losses.update(loss_depth)\n\n        if self.with_specific_component('motion_head'):\n            preds_motion_dicts = self.motion_head(\n                agent_instances,\n                preds_map_dicts[-1],\n                gt_ego_lcf_feat = kwargs['gt_ego_lcf_feat'],\n                gt_ego_fut_cmd = kwargs['gt_ego_fut_cmd'],\n                gt_ego_his_traj = kwargs['gt_ego_his_trajs'],\n                gt_ego_fut_trajs = kwargs['gt_ego_fut_trajs'],\n                img_metas=img_metas,\n            )\n            losses.update(\n                self.motion_head.loss(\n                    gt_agent_fut_traj = kwargs['gt_agent_fut_traj'],\n                    gt_agent_fut_traj_mask = kwargs['gt_agent_fut_traj_mask'],\n                    gt_ego_fut_cmd = kwargs['gt_ego_fut_cmd'],\n                    gt_ego_fut_trajs = kwargs['gt_ego_fut_trajs'],\n                    gt_ego_fut_masks = kwargs['gt_ego_fut_masks'],\n                    preds_dicts = preds_motion_dicts,\n                    preds_map_dicts =  preds_map_dicts[-1],\n                    matched_gt_idxes = agent_instances.matched_gt_idxes,\n                    img_metas = img_metas,\n                )\n            )\n        \n        if self.with_specific_component('planner_head'):\n            preds_plan_dicts = self.planner_head(\n                results,\n                kwargs['gt_ego_lcf_feat'],\n                kwargs['gt_ego_fut_cmd'],\n                kwargs['gt_ego_his_trajs'],\n                kwargs['gt_ego_fut_trajs'],\n                img_metas=img_metas,\n                map_results=preds_map_dicts[-1]\n            )\n            losses.update(\n                self.planner_head.loss(\n                    kwargs['gt_ego_fut_trajs'],\n                    kwargs['gt_ego_fut_cmd'],\n                    kwargs['gt_ego_fut_masks'],\n                    preds_plan_dicts,\n                    img_metas,\n                )\n            )\n        \n        return losses\n\n    def forward_test(self,\n                     points=None,\n                     img_metas=None,\n                     img_inputs=None,\n                     **kwargs):\n        \"\"\"\n        Args:\n            points (list[torch.Tensor]): the outer list indicates test-time\n                augmentations and inner torch.Tensor should have a shape NxC,\n                which contains all points in the batch.\n            img_metas (list[list[dict]]): the outer list indicates test-time\n                augs (multiscale, flip, etc.) and the inner list indicates\n                images in a batch\n            img (list[torch.Tensor], optional): the outer\n                list indicates test-time augmentations and inner\n                torch.Tensor should have a shape NxCxHxW, which contains\n                all images in the batch. Defaults to None.\n        \"\"\"\n        self.do_history = True\n        kwargs['can_bus_info'] = kwargs.get('can_bus_info', [None])[0]\n        if img_inputs is not None:\n            for var, name in [(img_inputs, 'img_inputs'),\n                          (img_metas, 'img_metas')]:\n                if not isinstance(var, list) :\n                    raise TypeError('{} must be a list, but got {}'.format(\n                        name, type(var)))        \n            num_augs = len(img_inputs)\n            if num_augs != len(img_metas):\n                raise ValueError(\n                    'num of augmentations ({}) != num of image meta ({})'.format(\n                        len(img_inputs), len(img_metas)))\n\n            if num_augs==1 and not img_metas[0][0].get('tta_config', dict(dist_tta=False))['dist_tta']:\n                return self.simple_test(points[0], img_metas[0], img_inputs[0],\n                                    **kwargs)\n            else:\n                return self.aug_test(points, img_metas, img_inputs, **kwargs)\n        \n        elif points is not None:\n            img_inputs = [img_inputs] if img_inputs is None else img_inputs\n            points = [points] if points is None else points\n            return self.simple_test(points[0], img_metas[0], img_inputs[0],\n                                    **kwargs)\n        \n    def aug_test(self,points,\n                    img_metas,\n                    img_inputs=None,\n                    visible_mask=[None],\n                    **kwargs):\n        \"\"\"Test function without augmentaiton.\"\"\"\n        assert False\n        return None\n\n    def simple_test(self,\n                    points,\n                    img_metas,\n                    img=None,\n                    rescale=False,\n                    visible_mask=[None],\n                    return_raw_occ=False,\n                    **kwargs):\n        \"\"\"Test function without augmentaiton.\"\"\"\n        results = self.extract_feat(\n            points, img=img, img_metas=img_metas, **kwargs)\n        \n        output_list = [dict() for _ in range(len(img_metas))]\n        \n        if  self.with_pts_bbox:\n            if getattr(self.pts_bbox_head, 'tracking', False):\n                preds_det_dicts, agent_instances = self.pts_bbox_head.forward_tracking(results, img_metas)\n            else:\n                preds_det_dicts = self.pts_bbox_head(results, img_metas)\n            pred_bbox = self.pts_bbox_head.get_bboxes(preds_det_dicts, img_metas, rescale=rescale)\n            pred_bbox[0]['index'] =  img_metas[0]['index']\n        else:\n            pred_bbox = [None for _ in range(len(img_metas))]\n\n        if self.with_specific_component('map_head'):\n            preds_map_dicts = self.map_head(results,\n             img_metas,\n             return_loss=False,\n             map_gt_bboxes_3d = kwargs.get('map_gt_bboxes_3d', None),\n             map_gt_labels_3d = kwargs.get('map_gt_labels_3d', None),\n             )\n            pred_map = self.map_head.get_bboxes(preds_map_dicts, img_metas)\n            pred_map[0]['index'] =  img_metas[0]['index']\n        else:\n            preds_map_dicts = [None] # dummy\n            pred_map = [None for _ in range(len(img_metas))]\n\n        if self.with_specific_component('motion_head'):\n            preds_motion_dicts = self.motion_head(\n                agent_instances,\n                preds_map_dicts[-1],\n                gt_ego_lcf_feat = kwargs['gt_ego_lcf_feat'][0],\n                gt_ego_fut_cmd = kwargs['gt_ego_fut_cmd'][0],\n                gt_ego_his_traj = kwargs['gt_ego_his_trajs'][0],\n                gt_ego_fut_trajs = kwargs['gt_ego_fut_trajs'][0],\n                img_metas=img_metas,\n            )\n            pred_motion = self.motion_head.get_motion(preds_motion_dicts, img_metas)\n            pred_motion[0]['index'] =  img_metas[0]['index']\n            pred_traj = self.motion_head.get_traj(\n                preds_motion_dicts,\n                img_metas,\n                gt_ego_fut_trajs=kwargs['gt_ego_fut_trajs'][0],\n                gt_ego_fut_cmd=kwargs['gt_ego_fut_cmd'][0],\n                gt_ego_fut_masks=kwargs['gt_ego_fut_masks'][0],\n                gt_fut_segmentations=kwargs['gt_fut_segmentations'][0],\n                gt_fut_segmentations_plus=kwargs['gt_fut_segmentations_plus'][0],\n                # vad_ego_fut_trajs=kwargs['vad_ego_fut_trajs'][0],\n             )\n            pred_traj[0]['index'] =  img_metas[0]['index']\n\n            # add motion traj to tracking results\n            num_bbox = pred_bbox[0]['track_scores'].size(0)\n            motion_info = np.zeros([num_bbox, 6, 8, 2])\n            motion_cls = np.zeros([num_bbox, 6])\n            for i, obj_idx in enumerate(pred_motion[0]['obj_idxes']):\n                try:\n                    bbox_ind = (pred_bbox[0]['obj_idxes']==obj_idx).nonzero().item()\n                except:\n                    continue\n                motion_info[bbox_ind] = pred_motion[0]['fut_trajs_in_global'][i]\n                motion_cls[bbox_ind] = pred_motion[0]['pred_traj_cls'][i]\n            pred_bbox[0]['motion_traj'] = motion_info\n            pred_bbox[0]['motion_cls'] = motion_cls\n\n        else:\n            pred_motion = [None for _ in range(len(img_metas))]\n            pred_traj = [None for _ in range(len(img_metas))]\n\n\n        if self.with_specific_component('occupancy_head'):\n            pred_occupancy = self.occupancy_head(results['img_bev_feat'], results=results, **kwargs)['output_voxels'][0]\n\n            pred_occupancy = pred_occupancy.permute(0, 2, 3, 4, 1)[0]\n            if self.fix_void:\n                pred_occupancy = pred_occupancy[..., 1:]     \n            pred_occupancy = pred_occupancy.softmax(-1)\n\n\n            # convert to CVPR2023 Format\n            pred_occupancy = pred_occupancy.permute(3, 2, 0, 1)\n            pred_occupancy = torch.flip(pred_occupancy, [2])\n            pred_occupancy = torch.rot90(pred_occupancy, -1, [2, 3])\n            pred_occupancy = pred_occupancy.permute(2, 3, 1, 0)\n            \n            if return_raw_occ:\n                pred_occupancy_category = pred_occupancy\n            else:\n                pred_occupancy_category = pred_occupancy.argmax(-1) \n            \n\n            # # do not change the order\n            # if self.occupancy_save_path is not None:\n            #     scene_name = img_metas[0]['scene_name']\n            #     sample_token = img_metas[0]['sample_idx']\n            #     mask_camera = visible_mask[0][0]\n            #     masked_pred_occupancy = pred_occupancy[mask_camera].cpu().numpy()\n            #     save_path = os.path.join(self.occupancy_save_path, 'occupancy_pred', scene_name+'_'+sample_token)\n            #     np.savez_compressed(save_path, pred=masked_pred_occupancy, sample_token=sample_token) \n\n            # For test server\n            if self.occupancy_save_path is not None:\n                    scene_name = img_metas[0]['scene_name']\n                    sample_token = img_metas[0]['sample_idx']\n                    # mask_camera = visible_mask[0][0]\n                    # masked_pred_occupancy = pred_occupancy[mask_camera].cpu().numpy()\n                    save_pred_occupancy = pred_occupancy.argmax(-1).cpu().numpy()\n                    save_path = os.path.join(self.occupancy_save_path, 'occupancy_pred', f'{sample_token}.npz')\n                    np.savez_compressed(save_path, save_pred_occupancy.astype(np.uint8)) \n\n            pred_occupancy_category= pred_occupancy_category.cpu().numpy()\n\n        else:\n            pred_occupancy_category =  None\n\n        if self.with_specific_component('planner_head'):\n            preds_dicts = self.planner_head(\n                results,\n                kwargs['gt_ego_lcf_feat'][0],\n                kwargs['gt_ego_fut_cmd'][0],\n                kwargs['gt_ego_his_trajs'][0],\n                kwargs['gt_ego_fut_trajs'][0],\n                img_metas=img_metas,\n                map_results=preds_map_dicts[-1]\n                )\n            pred_traj = self.planner_head.get_bboxes(preds_dicts, img_metas, gt_ego_fut_trajs=kwargs['gt_ego_fut_trajs'][0],\n             gt_ego_fut_cmd=kwargs['gt_ego_fut_cmd'][0], gt_ego_fut_masks=kwargs['gt_ego_fut_masks'][0], gt_fut_segmentations=kwargs['gt_fut_segmentations'][0],\n             gt_fut_segmentations_plus=kwargs['gt_fut_segmentations_plus'][0],\n             # vad_ego_fut_trajs=kwargs['vad_ego_fut_trajs'][0],\n             )\n            pred_traj[0]['index'] =  img_metas[0]['index']\n        else:\n            pred_traj = [None for _ in range(len(img_metas))]\n        # if results.get('bev_mask_logit', None) is not None:\n        #     pred_bev_mask = results['bev_mask_logit'].sigmoid() > 0.5\n\n        assert len(img_metas) == 1\n        for i, result_dict in enumerate(output_list):\n            result_dict['pts_bbox'] = pred_bbox[i]\n            result_dict['pred_map'] = pred_map[i]\n            result_dict['pred_motion'] = pred_motion[i]\n            result_dict['pred_ego_traj'] = pred_traj[i]\n            result_dict['pred_occupancy'] = pred_occupancy_category\n            result_dict['index'] = img_metas[i]['index']\n\n        # if not self.training:\n        #     self.visual_sample(output_list, **kwargs)\n        \n        return output_list\n\n\n    def forward_dummy(self,\n                      points=None,\n                      img_metas=None,\n                      img_inputs=None,\n                      **kwargs):\n        results = self.extract_feat(\n            points, img=img_inputs, img_metas=img_metas, **kwargs)\n        assert self.with_pts_bbox\n        outs = self.pts_bbox_head(results['img_bev_feat'])\n        return outs\n\n    def world2bev_vis(self, x, y):\n             return int((x + 51.2) * 5), int((y + 51.2) * 5)\n\n    def visual_sample(self, results, **kwargs):\n        \n        import cv2\n        # upper image is gt\n        bev_img = np.ones([1024, 512, 3], dtype=np.float32) * 255\n        bev_img = bev_img.astype(np.float32)\n\n        bev_img = cv2.circle(bev_img, self.world2bev_vis(0, 0), 5, (0, 255, 0), thickness=-1)\n        bev_img = cv2.circle(bev_img, self.world2bev_vis(0, 51.2 * 2), 5, (0, 255, 0), thickness=-1)\n\n        if results[0].get('pts_bbox') is not None:\n            bbox = results[0]['pts_bbox']['boxes_3d']\n            track_scores = results[0]['pts_bbox']['track_scores']\n            for i, corners in enumerate(bbox.corners[:, [4, 7, 3, 0], :2]):\n                if track_scores[i]<0.4: continue\n                corners = np.array([self.world2bev_vis(*corner) for corner in corners])\n                corners2 = np.array([(x, y+512) for (x, y) in corners])\n                \n                bev_img = cv2.circle(bev_img, corners[0], 1, (61, 102, 255))\n                bev_img = cv2.polylines(bev_img, pts=[corners], isClosed=True, color=(61, 102, 255), thickness=1)\n                \n                bev_img = cv2.circle(bev_img, corners2[0], 1, (61, 102, 255))\n                bev_img = cv2.polylines(bev_img, pts=[corners2], isClosed=True, color=(61, 102, 255), thickness=1)\n\n        if kwargs.get('gt_bboxes_3d', False):\n            gt_bboxes_3d = kwargs['gt_bboxes_3d'][0][0]\n            for i, corners in enumerate(gt_bboxes_3d.corners[:, [4, 7, 3, 0], :2]):\n                corners = np.array([self.world2bev_vis(*corner) for corner in corners])\n                bev_img = cv2.circle(bev_img, corners[0], 1, (61, 102, 255))\n                # bev_img = cv2.fillPoly(bev_img, [corners], (61, 102, 255))\n                bev_img = cv2.polylines(bev_img, pts=[corners], isClosed=True, color=(255, 102, 61), thickness=1)\n\n\n\n        if results[0].get('pred_ego_traj') is not None:\n            pred_ego_fut_trajs = results[0]['pred_ego_traj']['pred_ego_fut_trajs']\n            gt_ego_fut_trajs = results[0]['pred_ego_traj']['gt_ego_fut_trajs']\n\n            gt_ego_fut_trajs, colors = self._render_traj(gt_ego_fut_trajs.numpy())\n            points = np.array([self.world2bev_vis(*point) for point in gt_ego_fut_trajs])\n            for point, color in zip(points, colors):\n                bev_img = cv2.circle(bev_img, point, 1, color)\n\n            pred_ego_fut_trajs, colors = self._render_traj(pred_ego_fut_trajs.numpy(), colormap='autumn')\n            points = np.array([self.world2bev_vis(*point) for point in pred_ego_fut_trajs])\n            for point, color in zip(points, colors):\n                x,y = point\n                bev_img = cv2.circle(bev_img, (x, y+512), 1, color)\n\n\n        if kwargs.get('map_gt_bboxes_3d', False):\n            map_gt_bboxes_3d = kwargs['map_gt_bboxes_3d'][0][0]\n            map_gt_labels_3d = kwargs['map_gt_labels_3d'][0][0]\n            for k, line in enumerate(map_gt_bboxes_3d.fixed_num_sampled_points):\n                    label = map_gt_labels_3d[k]\n                    # line = (line[..., :2] - self.map_head.origin.cpu()) / self.map_head.roi_size.cpu()\n                    line = line.cpu().numpy()\n                    corners = np.array([self.world2bev_vis(*corner) for corner in line])\n                    corners = [each for each in corners if ((each>=0).all() & (each<512).all())]\n                    colors = [(255, 255, 0), (255, 0, 0), (0, 255, 0)]\n                    for i, corner in enumerate(corners[:-1]):\n                        bev_img = cv2.circle(bev_img, corners[i], 2, (61, 102, 255))\n                        bev_img = cv2.line(bev_img, corners[i], corners[i+1], color=colors[label], thickness=1)\n\n        if results[0].get('pred_map') is not None:       \n            for k, line in enumerate(results[0]['pred_map']['map_pts_3d']):\n                label = results[0]['pred_map']['map_labels_3d'][k]\n                # if label !=0: continue\n                score = results[0]['pred_map']['map_scores_3d'][k]\n                if score < 0.4: continue\n                line = line.cpu().numpy()\n                corners = np.array([self.world2bev_vis(*corner) for corner in line])\n                corners = [each for each in corners if ((each>=0).all() & (each<512).all())]\n                corners = [(x, y+512) for (x, y) in corners ]\n                colors = [(255, 255, 0), (255, 0, 0), (0, 255, 0)]\n                for i, corner in enumerate(corners[:-1]):\n                    bev_img = cv2.circle(bev_img, corners[i], 2, (61, 102, 255))\n                    bev_img = cv2.line(bev_img, corners[i], corners[i+1], color=colors[label], thickness=1)\n\n\n\n        if kwargs.get('gt_agent_fut_traj', False):\n            gt_agent_fut_traj = kwargs['gt_agent_fut_traj'][0][0].cpu()\n            gt_agent_fut_traj_mask = kwargs['gt_agent_fut_traj_mask'][0][0].cpu()\n            centers = kwargs['gt_bboxes_3d'][0][0].center[..., :2].cpu()\n            tmp = torch.cat([centers[:, None], gt_agent_fut_traj], 1)\n            trajs = torch.cumsum(tmp, 1)[:, 1:]\n            for k, traj in enumerate(trajs):\n                traj = traj.cpu().numpy()\n                corners = np.array([self.world2bev_vis(*corner) for corner in traj])\n                center = np.array(self.world2bev_vis(*centers[k]))\n                corners = [each for each in corners if ((each>=0).all() & (each<1536).all())]\n                colors = [(255, 255, 0), (255, 0, 0), (0, 255, 0)]\n                for i, corner in enumerate(corners[:-1]):\n                    if gt_agent_fut_traj_mask[k, i+1].sum()<2 or gt_agent_fut_traj_mask[k, i].sum()<2:\n                        continue\n                    if i == 0: \n                        bev_img = cv2.line(bev_img, center, corners[i], color=(123, 22, 187), thickness=1)\n                    # bev_img = cv2.circle(bev_img, corners[i], 2, (61, 102, 32))\n                    bev_img = cv2.line(bev_img, corners[i], corners[i+1], color=(123, 22, 187), thickness=1)\n\n        \n        if results[0].get('pred_motion') is not None:       \n            \n            obj_idxes_list = results[0]['pts_bbox']['obj_idxes']\n            centers = results[0]['pts_bbox']['boxes_3d'].center[..., :2].cpu().numpy()\n            \n            # pred_agent_fut_trajs = results[0]['pred_motion']['pred_agent_fut_trajs']\n            pred_agent_fut_trajs2 = results[0]['pred_motion']['pred_agent_fut_trajs2']\n            motion_obj_idxes = results[0]['pred_motion']['obj_idxes']\n\n            for k, trajs in enumerate(pred_agent_fut_trajs2):\n                try:\n                    track_k = (obj_idxes_list==motion_obj_idxes[k]).nonzero()[0][0]\n                except:\n                    continue\n                if track_scores[track_k]<0.4: continue\n\n                traj_ind = results[0]['pred_motion']['pred_traj_cls'][k].argmax()\n                # for traj in trajs:\n                traj = trajs[traj_ind]\n                \n                corners = np.array([self.world2bev_vis(*corner) for corner in traj])\n                corners = np.array([(x, y+512) for (x, y) in corners])\n                center = np.array(self.world2bev_vis(*centers[track_k]))\n                center[-1] +=512\n                corners = [each for each in corners if ((each>=0).all() & (each<1536).all())]\n                colors = [(255, 255, 0), (255, 0, 0), (0, 255, 0)]\n                for i, corner in enumerate(corners[:-1]):\n                    if i == 0: bev_img = cv2.line(bev_img, center, corners[i], color=(123, 22, 187), thickness=1)\n                    # bev_img = cv2.circle(bev_img, corners[i], 2, (61, 102, 32))\n                    bev_img = cv2.line(bev_img, corners[i], corners[i+1], color=(22, 122, 187), thickness=1)\n\n        mmcv.imwrite(bev_img, f'bev_{results[0][\"index\"]}.png')\n\n    def _render_traj(self, future_traj, traj_score=1, colormap='winter', points_per_step=5, line_color=None, dot_color=None, dot_size=25):\n        total_steps = (len(future_traj)-1) * points_per_step + 1\n        dot_colors = matplotlib.colormaps[colormap](\n            np.linspace(0, 1, total_steps))[:, :3] * 255\n        dot_colors = dot_colors*traj_score + \\\n            (1-traj_score)*np.ones_like(dot_colors)\n        total_xy = np.zeros((total_steps, 2))\n        for i in range(total_steps-1):\n            unit_vec = future_traj[i//points_per_step +\n                                   1] - future_traj[i//points_per_step]\n            total_xy[i] = (i/points_per_step - i//points_per_step) * \\\n                unit_vec + future_traj[i//points_per_step]\n        total_xy[-1] = future_traj[-1]\n        return total_xy, dot_colors"
  },
  {
    "path": "mmdet3d/models/fbbev/heads/__init__.py",
    "content": "from .occupancy_head import OccHead\nfrom .yolox import YOLOXHeadCustom"
  },
  {
    "path": "mmdet3d/models/fbbev/heads/occupancy_head.py",
    "content": "# Copyright (c) 2022-2023, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# https://github.com/NVlabs/FB-BEV/blob/main/LICENSE\n\nimport copy\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmdet.core import reduce_mean\nfrom mmdet.models import HEADS\nfrom mmcv.cnn import build_conv_layer, build_norm_layer, build_upsample_layer\nfrom mmdet3d.models.fbbev.modules.occ_loss_utils import lovasz_softmax, CustomFocalLoss\nfrom mmdet3d.models.fbbev.modules.occ_loss_utils import nusc_class_frequencies, nusc_class_names\nfrom mmdet3d.models.fbbev.modules.occ_loss_utils import geo_scal_loss, sem_scal_loss, CE_ssc_loss\nfrom torch.utils.checkpoint import checkpoint as cp\nfrom mmcv.runner import BaseModule, force_fp32\nfrom torch.cuda.amp import autocast\nfrom mmdet3d.models import builder\n\n@HEADS.register_module()\nclass OccHead(BaseModule):\n    def __init__(\n        self,\n        in_channels,\n        out_channel,\n        num_level=1,\n        soft_weights=False,\n        loss_weight_cfg=None,\n        conv_cfg=dict(type='Conv3d', bias=False),\n        norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),\n        point_cloud_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0],\n        final_occ_size=[256, 256, 20],\n        empty_idx=0,\n        balance_cls_weight=True,\n        train_cfg=None,\n        test_cfg=None,\n        with_cp=False,\n        use_focal_loss=False,\n        use_dice_loss= False,\n        use_deblock=True,\n    ):\n        super(OccHead, self).__init__()\n\n        self.fp16_enabled=False\n      \n        if type(in_channels) is not list:\n            in_channels = [in_channels]\n        self.with_cp = with_cp\n        self.use_deblock = use_deblock\n        self.use_focal_loss = use_focal_loss\n        if self.use_focal_loss:\n            self.focal_loss = builder.build_loss(dict(type='CustomFocalLoss'))\n        self.in_channels = in_channels\n        self.out_channel = out_channel\n        self.num_level = num_level\n        \n        self.point_cloud_range = torch.tensor(np.array(point_cloud_range)).float()\n\n        if loss_weight_cfg is None:\n            self.loss_weight_cfg = {\n                \"loss_voxel_ce_weight\": 1.0,\n                \"loss_voxel_sem_scal_weight\": 1.0,\n                \"loss_voxel_geo_scal_weight\": 1.0,\n                \"loss_voxel_lovasz_weight\": 1.0,\n            }\n        else:\n            self.loss_weight_cfg = loss_weight_cfg\n        \n        # voxel losses\n        self.loss_voxel_ce_weight = self.loss_weight_cfg.get('loss_voxel_ce_weight', 1.0)\n        self.loss_voxel_sem_scal_weight = self.loss_weight_cfg.get('loss_voxel_sem_scal_weight', 1.0)\n        self.loss_voxel_geo_scal_weight = self.loss_weight_cfg.get('loss_voxel_geo_scal_weight', 1.0)\n        self.loss_voxel_lovasz_weight = self.loss_weight_cfg.get('loss_voxel_lovasz_weight', 1.0)\n        \n\n\n        # voxel-level prediction\n        self.occ_convs = nn.ModuleList()\n        for i in range(self.num_level):\n            mid_channel = self.in_channels[i] // 2\n            occ_conv = nn.Sequential(\n                build_conv_layer(conv_cfg, in_channels=self.in_channels[i], \n                        out_channels=mid_channel, kernel_size=3, stride=1, padding=1),\n                build_norm_layer(norm_cfg, mid_channel)[1],\n                nn.ReLU(inplace=True))\n            self.occ_convs.append(occ_conv)\n\n\n        self.occ_pred_conv = nn.Sequential(\n                build_conv_layer(conv_cfg, in_channels=mid_channel, \n                        out_channels=mid_channel//2, kernel_size=1, stride=1, padding=0),\n                build_norm_layer(norm_cfg, mid_channel//2)[1],\n                nn.ReLU(inplace=True),\n                build_conv_layer(conv_cfg, in_channels=mid_channel//2, \n                        out_channels=out_channel, kernel_size=1, stride=1, padding=0))\n\n        self.soft_weights = soft_weights\n        self.num_point_sampling_feat = self.num_level + 1 * self.use_deblock\n        if self.soft_weights:\n            soft_in_channel = mid_channel\n            self.voxel_soft_weights = nn.Sequential(\n                build_conv_layer(conv_cfg, in_channels=soft_in_channel, \n                        out_channels=soft_in_channel//2, kernel_size=1, stride=1, padding=0),\n                build_norm_layer(norm_cfg, soft_in_channel//2)[1],\n                nn.ReLU(inplace=True),\n                build_conv_layer(conv_cfg, in_channels=soft_in_channel//2, \n                        out_channels=self.num_point_sampling_feat, kernel_size=1, stride=1, padding=0))\n            \n        # loss functions\n        self.use_dice_loss = use_dice_loss\n        if self.use_dice_loss:\n            self.dice_loss = builder.build_loss(dict(type='DiceLoss', loss_weight=2))\n\n        if balance_cls_weight:\n            if out_channel == 19:\n                self.class_weights = torch.from_numpy(1 / np.log(nusc_class_frequencies[:out_channel] + 0.001))\n                self.class_weights = torch.cat([torch.tensor([0]), self.class_weights])\n            else:\n                if out_channel == 17: nusc_class_frequencies[0] += nusc_class_frequencies[-1]\n                self.class_weights = torch.from_numpy(1 / np.log(nusc_class_frequencies[:out_channel] + 0.001))\n        else:\n            self.class_weights = torch.ones(out_channel)/out_channel  # FIXME hardcode 17\n\n        if self.use_deblock:\n            upsample_cfg=dict(type='deconv3d', bias=False)\n            upsample_layer = build_conv_layer(\n                    upsample_cfg,\n                    in_channels=self.in_channels[0],\n                    out_channels=self.in_channels[0]//2,\n                    kernel_size=2,\n                    stride=2,\n                    padding=0)\n\n            self.deblock = nn.Sequential(upsample_layer,\n                                    build_norm_layer(norm_cfg, self.in_channels[0]//2)[1],\n                                    nn.ReLU(inplace=True))\n\n\n        self.class_names = nusc_class_names    \n        self.empty_idx = empty_idx\n    \n    @force_fp32(apply_to=('voxel_feats')) \n    def forward_coarse_voxel(self, voxel_feats):\n        output_occs = []\n        output = {}\n\n        if self.use_deblock:\n            if self.with_cp and voxel_feats[0].requires_grad:\n                x0 = cp(self.deblock, voxel_feats[0])\n            else:\n                x0 = self.deblock(voxel_feats[0])\n            output_occs.append(x0)\n        for feats, occ_conv in zip(voxel_feats, self.occ_convs):\n            if self.with_cp  and feats.requires_grad:\n                x = cp(occ_conv, feats)\n            else:\n                x = occ_conv(feats)\n            output_occs.append(x)\n\n        if self.soft_weights:\n            voxel_soft_weights = self.voxel_soft_weights(output_occs[0])\n            voxel_soft_weights = torch.softmax(voxel_soft_weights, dim=1)\n        else:\n            voxel_soft_weights = torch.ones([output_occs[0].shape[0], self.num_point_sampling_feat, 1, 1, 1],).to(output_occs[0].device) / self.num_point_sampling_feat\n\n        out_voxel_feats = 0\n        _, _, H, W, D= output_occs[0].shape\n        for feats, weights in zip(output_occs, torch.unbind(voxel_soft_weights, dim=1)):\n            feats = F.interpolate(feats, size=[H, W, D], mode='trilinear', align_corners=False).contiguous()\n            out_voxel_feats += feats * weights.unsqueeze(1)\n        output['out_voxel_feats'] = [out_voxel_feats]\n        if self.with_cp and  out_voxel_feats.requires_grad:\n            out_voxel = cp(self.occ_pred_conv, out_voxel_feats)\n        else:\n            out_voxel = self.occ_pred_conv(out_voxel_feats)\n\n        output['occ'] = [out_voxel]\n\n        return output\n     \n    @force_fp32()\n    def forward(self, voxel_feats, img_feats=None, pts_feats=None, transform=None, **kwargs):\n        \n        assert type(voxel_feats) is list and len(voxel_feats) == self.num_level\n        \n        output = self.forward_coarse_voxel(voxel_feats)\n        out_voxel_feats = output['out_voxel_feats'][0]\n        coarse_occ = output['occ'][0]\n\n        res = {\n            'output_voxels': output['occ'],\n            'output_voxels_fine': output.get('fine_output', None),\n            'output_coords_fine': output.get('fine_coord', None),\n        }\n\n\n        return res\n    \n    @force_fp32()\n    def forward_train(self, voxel_feats, img_feats=None, pts_feats=None, transform=None, gt_occupancy=None, gt_occupancy_flow=None, **kwargs):\n        res = self.forward(voxel_feats, img_feats=img_feats, pts_feats=pts_feats, transform=transform, **kwargs)\n        loss = self.loss(target_voxels=gt_occupancy,\n            output_voxels = res['output_voxels'],\n            output_coords_fine=res['output_coords_fine'],\n            output_voxels_fine=res['output_voxels_fine'])\n\n        return loss\n\n\n    @force_fp32() \n    def loss_voxel(self, output_voxels, target_voxels, tag):\n\n        # resize gt                       \n        B, C, H, W, D = output_voxels.shape\n        ratio = target_voxels.shape[2] // H\n        if ratio != 1:\n            target_voxels = target_voxels.reshape(B, H, ratio, W, ratio, D, ratio).permute(0,1,3,5,2,4,6).reshape(B, H, W, D, ratio**3)\n            empty_mask = target_voxels.sum(-1) == self.empty_idx\n            target_voxels = target_voxels.to(torch.int64)\n            occ_space = target_voxels[~empty_mask]\n            occ_space[occ_space==0] = -torch.arange(len(occ_space[occ_space==0])).to(occ_space.device) - 1\n            target_voxels[~empty_mask] = occ_space\n            target_voxels = torch.mode(target_voxels, dim=-1)[0]\n            target_voxels[target_voxels<0] = 255\n            target_voxels = target_voxels.long()\n        \n        # output_voxels = torch.log(output_voxels * 0) + output_voxels/0 # debug !!!!!!!!\n\n        output_voxels[torch.isnan(output_voxels)] = 0\n        output_voxels[torch.isinf(output_voxels)] = 0\n        assert torch.isnan(output_voxels).sum().item() == 0\n        assert torch.isnan(target_voxels).sum().item() == 0\n\n        loss_dict = {}\n\n        # igore 255 = ignore noise. we keep the loss bascward for the label=0 (free voxels)\n        if self.use_focal_loss:\n            loss_dict['loss_voxel_ce_{}'.format(tag)] = self.loss_voxel_ce_weight * self.focal_loss(output_voxels, target_voxels, self.class_weights.type_as(output_voxels), ignore_index=255)\n        else:\n            loss_dict['loss_voxel_ce_{}'.format(tag)] = self.loss_voxel_ce_weight * CE_ssc_loss(output_voxels, target_voxels, self.class_weights.type_as(output_voxels), ignore_index=255)\n\n        loss_dict['loss_voxel_sem_scal_{}'.format(tag)] = self.loss_voxel_sem_scal_weight * sem_scal_loss(output_voxels, target_voxels, ignore_index=255)\n        loss_dict['loss_voxel_geo_scal_{}'.format(tag)] = self.loss_voxel_geo_scal_weight * geo_scal_loss(output_voxels, target_voxels, ignore_index=255, non_empty_idx=self.empty_idx)\n        loss_dict['loss_voxel_lovasz_{}'.format(tag)] = self.loss_voxel_lovasz_weight * lovasz_softmax(torch.softmax(output_voxels, dim=1), target_voxels, ignore=255)\n\n\n        if self.use_dice_loss:\n            visible_mask = target_voxels!=255\n            visible_pred_voxels = output_voxels.permute(0, 2, 3, 4, 1)[visible_mask]\n            visible_target_voxels = target_voxels[visible_mask]\n            visible_target_voxels = F.one_hot(visible_target_voxels.to(torch.long), 19)\n            loss_dict['loss_voxel_dice_{}'.format(tag)] = self.dice_loss(visible_pred_voxels, visible_target_voxels)\n\n        return loss_dict\n\n    @force_fp32() \n    def loss(self, output_voxels=None,\n                output_coords_fine=None, output_voxels_fine=None, \n                target_voxels=None, visible_mask=None, **kwargs):\n        loss_dict = {}\n        for index, output_voxel in enumerate(output_voxels):\n            loss_dict.update(self.loss_voxel(output_voxel, target_voxels,  tag='c_{}'.format(index)))\n        return loss_dict\n"
  },
  {
    "path": "mmdet3d/models/fbbev/heads/yolox.py",
    "content": "import math\n\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import (ConvModule, DepthwiseSeparableConvModule,\n                      bias_init_with_prob)\nfrom mmcv.ops.nms import batched_nms\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import (MlvlPointGenerator, bbox_xyxy_to_cxcywh,\n                        build_assigner, build_sampler, multi_apply,\n                        reduce_mean)\nfrom mmdet.models.builder import HEADS, build_loss\nfrom mmdet.models.dense_heads.base_dense_head import BaseDenseHead\nfrom mmdet.models.dense_heads.dense_test_mixins import BBoxTestMixin\n\n\n@HEADS.register_module()\nclass YOLOXHeadCustom(BaseDenseHead, BBoxTestMixin):\n    \"\"\"YOLOXHead head used in `YOLOX <https://arxiv.org/abs/2107.08430>`_.\n    Args:\n        num_classes (int): Number of categories excluding the background\n            category.\n        in_channels (int): Number of channels in the input feature map.\n        feat_channels (int): Number of hidden channels in stacking convs.\n            Default: 256\n        stacked_convs (int): Number of stacking convs of the head.\n            Default: 2.\n        strides (tuple): Downsample factor of each feature map.\n        use_depthwise (bool): Whether to depthwise separable convolution in\n            blocks. Default: False\n        dcn_on_last_conv (bool): If true, use dcn in the last layer of\n            towers. Default: False.\n        conv_bias (bool | str): If specified as `auto`, it will be decided by\n            the norm_cfg. Bias of conv will be set as True if `norm_cfg` is\n            None, otherwise False. Default: \"auto\".\n        conv_cfg (dict): Config dict for convolution layer. Default: None.\n        norm_cfg (dict): Config dict for normalization layer. Default: None.\n        act_cfg (dict): Config dict for activation layer. Default: None.\n        loss_cls (dict): Config of classification loss.\n        loss_bbox (dict): Config of localization loss.\n        loss_obj (dict): Config of objectness loss.\n        loss_l1 (dict): Config of L1 loss.\n        train_cfg (dict): Training config of anchor head.\n        test_cfg (dict): Testing config of anchor head.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 feat_channels=256,\n                 stacked_convs=2,\n                 strides=[8, 16, 32],\n                 use_depthwise=False,\n                 dcn_on_last_conv=False,\n                 conv_bias='auto',\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),\n                 act_cfg=dict(type='Swish'),\n                 loss_cls=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     reduction='sum',\n                     loss_weight=1.0),\n                 loss_bbox=dict(\n                     type='IoULoss',\n                     mode='square',\n                     eps=1e-16,\n                     reduction='sum',\n                     loss_weight=5.0),\n                 loss_obj=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     reduction='sum',\n                     loss_weight=1.0),\n                 loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0),\n                 loss_centers2d=dict(type='L1Loss', reduction='sum', loss_weight=1.0),\n                 train_cfg=None,\n                 test_cfg=None,\n                 init_cfg=dict(\n                     type='Kaiming',\n                     layer='Conv2d',\n                     a=math.sqrt(5),\n                     distribution='uniform',\n                     mode='fan_in',\n                     nonlinearity='leaky_relu')):\n\n        super().__init__(init_cfg=init_cfg)\n        self.num_classes = num_classes\n        self.cls_out_channels = num_classes\n        self.in_channels = in_channels\n        self.feat_channels = feat_channels\n        self.stacked_convs = stacked_convs\n        self.strides = strides\n        self.use_depthwise = use_depthwise\n        self.dcn_on_last_conv = dcn_on_last_conv\n        assert conv_bias == 'auto' or isinstance(conv_bias, bool)\n        self.conv_bias = conv_bias\n        self.use_sigmoid_cls = True\n\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.act_cfg = act_cfg\n\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_bbox = build_loss(loss_bbox)\n        self.loss_obj = build_loss(loss_obj)\n        self.loss_centers2d = build_loss(loss_centers2d)\n\n        self.use_l1 = True  # This flag will be modified by hooks.\n        self.loss_l1 = build_loss(loss_l1)\n\n        self.prior_generator = MlvlPointGenerator(strides, offset=0)\n\n        self.test_cfg = test_cfg\n        self.train_cfg = train_cfg\n\n        self.sampling = False\n        if self.train_cfg:\n            self.assigner = build_assigner(self.train_cfg.assigner)\n            # sampling=False so use PseudoSampler\n            sampler_cfg = dict(type='PseudoSampler')\n            self.sampler = build_sampler(sampler_cfg, context=self)\n            self.sampler_ = build_sampler(sampler_cfg, context=self)\n\n        self.fp16_enabled = False\n        self._init_layers()\n\n    def _init_layers(self):\n        self.multi_level_cls_convs = nn.ModuleList()\n        self.multi_level_reg_convs = nn.ModuleList()\n        self.multi_level_conv_cls = nn.ModuleList()\n        self.multi_level_conv_reg = nn.ModuleList()\n        self.multi_level_conv_obj = nn.ModuleList()\n        self.multi_level_conv_centers2d = nn.ModuleList()\n        for _ in self.strides:\n            self.multi_level_cls_convs.append(self._build_stacked_convs())\n            self.multi_level_reg_convs.append(self._build_stacked_convs())\n            conv_cls, conv_reg, conv_obj, conv_centers2d = self._build_predictor()\n            self.multi_level_conv_cls.append(conv_cls)\n            self.multi_level_conv_reg.append(conv_reg)\n            self.multi_level_conv_obj.append(conv_obj)\n            self.multi_level_conv_centers2d.append(conv_centers2d)\n\n    def _build_stacked_convs(self):\n        \"\"\"Initialize conv layers of a single level head.\"\"\"\n        conv = DepthwiseSeparableConvModule \\\n            if self.use_depthwise else ConvModule\n        stacked_convs = []\n        for i in range(self.stacked_convs):\n            chn = self.in_channels if i == 0 else self.feat_channels\n            if self.dcn_on_last_conv and i == self.stacked_convs - 1:\n                conv_cfg = dict(type='DCNv2')\n            else:\n                conv_cfg = self.conv_cfg\n            stacked_convs.append(\n                conv(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=self.norm_cfg,\n                    act_cfg=self.act_cfg,\n                    bias=self.conv_bias))\n        return nn.Sequential(*stacked_convs)\n\n    def _build_predictor(self):\n        \"\"\"Initialize predictor layers of a single level head.\"\"\"\n        conv_cls = nn.Conv2d(self.feat_channels, self.cls_out_channels, 1)\n        conv_reg = nn.Conv2d(self.feat_channels, 4, 1)\n        conv_obj = nn.Conv2d(self.feat_channels, 1, 1)\n        conv_centers2d = nn.Conv2d(self.feat_channels, 2, 1)\n        return conv_cls, conv_reg, conv_obj, conv_centers2d\n\n    def init_weights(self):\n        super(YOLOXHeadCustom, self).init_weights()\n        # Use prior in model initialization to improve stability\n        bias_init = bias_init_with_prob(0.01)\n        for conv_cls, conv_obj in zip(self.multi_level_conv_cls,\n                                      self.multi_level_conv_obj):\n            conv_cls.bias.data.fill_(bias_init)\n            conv_obj.bias.data.fill_(bias_init)\n\n    @force_fp32(apply_to=('x')) \n    def forward_single(self, x, cls_convs, reg_convs, conv_cls, conv_reg,\n                       conv_obj, conv_centers2d):\n        \"\"\"Forward feature of a single scale level.\"\"\"\n        \n        if x.dim() == 5:\n            bs, n, c, h, w= x.shape\n            x = x.reshape(bs*n, c, h, w)\n\n        cls_feat = cls_convs(x)\n        reg_feat = reg_convs(x)\n\n        cls_score = conv_cls(cls_feat)\n        bbox_pred = conv_reg(reg_feat)\n        objectness = conv_obj(reg_feat)\n        centers2d_offset = conv_centers2d(reg_feat)\n\n        return cls_score, bbox_pred, objectness, centers2d_offset\n\n    @force_fp32(apply_to=('feats')) \n    def forward(self, feats):\n        \"\"\"Forward features from the upstream network.\n        Args:\n            feats (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n        Returns:\n            tuple[Tensor]: A tuple of multi-level predication map, each is a\n                4D-tensor of shape (batch_size, 5+num_classes, height, width).\n        \"\"\"\n        # feats = data['img_feats']\n        cls_scores, bbox_preds, objectnesses, centers2d_offsets= multi_apply(self.forward_single, feats,\n                           self.multi_level_cls_convs,\n                           self.multi_level_reg_convs,\n                           self.multi_level_conv_cls,\n                           self.multi_level_conv_reg,\n                           self.multi_level_conv_obj,\n                           self.multi_level_conv_centers2d,\n                           )\n        out = {\n            'enc_cls_scores': cls_scores,\n            'enc_bbox_preds': bbox_preds,\n            'pred_centers2d_offset': centers2d_offsets,\n            'objectnesses':objectnesses,\n            'topk_indexes':None\n        }\n        return out\n\n    def _bbox_decode(self, priors, bbox_preds):\n        xys = (bbox_preds[..., :2] * priors[:, 2:]) + priors[:, :2]\n        whs = bbox_preds[..., 2:].exp() * priors[:, 2:]\n\n        tl_x = (xys[..., 0] - whs[..., 0] / 2)\n        tl_y = (xys[..., 1] - whs[..., 1] / 2)\n        br_x = (xys[..., 0] + whs[..., 0] / 2)\n        br_y = (xys[..., 1] + whs[..., 1] / 2)\n\n        decoded_bboxes = torch.stack([tl_x, tl_y, br_x, br_y], -1)\n        return decoded_bboxes\n    \n    def _centers2d_decode(self, priors, centers2d):\n        centers2d = (centers2d[..., :2] * priors[:, 2:]) + priors[:, :2]\n        return centers2d\n\n    def _bboxes_nms(self, cls_scores, bboxes, score_factor, cfg):\n        max_scores, labels = torch.max(cls_scores, 1)\n        valid_mask = score_factor * max_scores >= cfg.score_thr\n\n        bboxes = bboxes[valid_mask]\n        scores = max_scores[valid_mask] * score_factor[valid_mask]\n        labels = labels[valid_mask]\n\n        if labels.numel() == 0:\n            return bboxes, labels\n        else:\n            dets, keep = batched_nms(bboxes, scores, labels, cfg.nms)\n            return dets, labels[keep]\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'objectnesses', 'centers2d'))\n    def loss(self,\n             gt_bboxes2d_list,\n             gt_labels2d_list,\n             centers2d,\n             preds_dicts,\n             depths,\n             img_metas, #len=B\n             gt_bboxes_ignore=None):\n        \"\"\"Compute loss of the head.`\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level,\n                each is a 4D-tensor, the channel number is\n                num_priors * num_classes.\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level, each is a 4D-tensor, the channel number is\n                num_priors * 4.\n            objectnesses (list[Tensor], Optional): Score factor for\n                all scale level, each is a 4D-tensor, has shape\n                (batch_size, 1, H, W).\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (None | list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss.\n        \"\"\"\n        cls_scores = preds_dicts['enc_cls_scores']\n        bbox_preds = preds_dicts['enc_bbox_preds']\n        objectnesses = preds_dicts['objectnesses']\n        centers2d_offset = preds_dicts['pred_centers2d_offset']\n        num_imgs = cls_scores[0].shape[0]\n        featmap_sizes = [cls_score.shape[2:] for cls_score in cls_scores]\n        mlvl_priors = self.prior_generator.grid_priors(\n            featmap_sizes,\n            dtype=cls_scores[0].dtype,\n            device=cls_scores[0].device,\n            with_stride=True)\n            \n        flatten_cls_preds = [\n            cls_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1,\n                                                 self.cls_out_channels)\n            for cls_pred in cls_scores\n        ]\n        flatten_bbox_preds = [\n            bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4)\n            for bbox_pred in bbox_preds\n        ]\n        flatten_objectness = [\n            objectness.permute(0, 2, 3, 1).reshape(num_imgs, -1)\n            for objectness in objectnesses\n        ]\n        flatten_centers2d_offset = [\n            center2d_offset.permute(0, 2, 3, 1).reshape(num_imgs, -1, 2)\n            for center2d_offset in centers2d_offset\n        ]\n\n        flatten_cls_preds = torch.cat(flatten_cls_preds, dim=1)\n        flatten_bbox_preds = torch.cat(flatten_bbox_preds, dim=1)\n        flatten_objectness = torch.cat(flatten_objectness, dim=1)\n        flatten_centers2d_offset = torch.cat(flatten_centers2d_offset, dim=1)\n        flatten_priors = torch.cat(mlvl_priors)\n        flatten_bboxes = self._bbox_decode(flatten_priors, flatten_bbox_preds)\n        device = cls_scores[0].device\n        gt_bboxes = [bboxes2d.to(device) for i in gt_bboxes2d_list for bboxes2d in i]\n        gt_labels = [labels2d.to(device) for i in gt_labels2d_list for labels2d in i]\n        centers2d = [center2d.to(device) for i in centers2d for center2d in i]\n\n        (pos_masks, cls_targets, obj_targets, bbox_targets, l1_targets, centers2d_target,\n         num_fg_imgs) = multi_apply(\n             self._get_target_single, flatten_cls_preds.detach(),\n             flatten_objectness.detach(),\n             flatten_priors.unsqueeze(0).repeat(num_imgs, 1, 1),\n             flatten_bboxes.detach(), gt_bboxes, gt_labels, centers2d)\n\n        # The experimental results show that ‘reduce_mean’ can improve\n        # performance on the COCO dataset.\n        num_pos = torch.tensor(\n            sum(num_fg_imgs),\n            dtype=torch.float,\n            device=flatten_cls_preds.device)\n        num_total_samples = max(reduce_mean(num_pos), 1.0)\n\n        pos_masks = torch.cat(pos_masks, 0)\n        cls_targets = torch.cat(cls_targets, 0)\n        obj_targets = torch.cat(obj_targets, 0)\n        bbox_targets = torch.cat(bbox_targets, 0)\n        if self.use_l1:\n            l1_targets = torch.cat(l1_targets, 0)\n        centers2d_target = torch.cat(centers2d_target, 0)\n\n        loss_bbox = self.loss_bbox(\n            flatten_bboxes.view(-1, 4)[pos_masks],\n            bbox_targets) / num_total_samples\n        loss_obj = self.loss_obj(flatten_objectness.view(-1, 1),\n                                 obj_targets) / num_total_samples\n        loss_cls = self.loss_cls(\n            flatten_cls_preds.view(-1, self.num_classes)[pos_masks],\n            cls_targets) / num_total_samples\n        loss_centers2d = self.loss_centers2d(\n            flatten_centers2d_offset.view(-1, 2)[pos_masks],\n            centers2d_target) / num_total_samples\n\n        loss_dict = dict(\n            enc_loss_cls=loss_cls, enc_loss_iou=loss_bbox, enc_loss_obj=loss_obj, enc_loss_centers2d=loss_centers2d)\n\n        if self.use_l1:\n            loss_l1 = self.loss_l1(\n                flatten_bbox_preds.view(-1, 4)[pos_masks],\n                l1_targets) / num_total_samples\n            loss_dict.update(enc_loss_bbox=loss_l1)\n\n        return loss_dict\n\n    @torch.no_grad()\n    def _get_target_single(self, cls_preds, objectness, priors, decoded_bboxes,\n                    gt_bboxes, gt_labels, centers2d):\n        \"\"\"Compute classification, regression, and objectness targets for\n        priors in a single image.\n        Args:\n            cls_preds (Tensor): Classification predictions of one image,\n                a 2D-Tensor with shape [num_priors, num_classes]\n            objectness (Tensor): Objectness predictions of one image,\n                a 1D-Tensor with shape [num_priors]\n            priors (Tensor): All priors of one image, a 2D-Tensor with shape\n                [num_priors, 4] in [cx, xy, stride_w, stride_y] format.\n            decoded_bboxes (Tensor): Decoded bboxes predictions of one image,\n                a 2D-Tensor with shape [num_priors, 4] in [tl_x, tl_y,\n                br_x, br_y] format.\n            gt_bboxes (Tensor): Ground truth bboxes of one image, a 2D-Tensor\n                with shape [num_gts, 4] in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (Tensor): Ground truth labels of one image, a Tensor\n                with shape [num_gts].\n        \"\"\"\n\n        num_priors = priors.size(0)\n        num_gts = gt_labels.size(0)\n        gt_bboxes = gt_bboxes.to(decoded_bboxes.dtype)\n        centers2d = centers2d.to(decoded_bboxes.dtype)\n        # No target\n        if num_gts == 0:\n            cls_target = cls_preds.new_zeros((0, self.num_classes))\n            bbox_target = cls_preds.new_zeros((0, 4))\n            l1_target = cls_preds.new_zeros((0, 4))\n            obj_target = cls_preds.new_zeros((num_priors, 1))\n            foreground_mask = cls_preds.new_zeros(num_priors).bool()\n            centers2d_target = cls_preds.new_zeros((0, 2))\n            return (foreground_mask, cls_target, obj_target, bbox_target,\n                    l1_target, centers2d_target, 0)\n\n        # YOLOX uses center priors with 0.5 offset to assign targets,\n        # but use center priors without offset to regress bboxes.\n        offset_priors = torch.cat(\n            [priors[:, :2] + priors[:, 2:] * 0.5, priors[:, 2:]], dim=-1)\n\n        assign_result = self.assigner.assign(\n            cls_preds.sigmoid() * objectness.unsqueeze(1).sigmoid(),\n            offset_priors, decoded_bboxes, gt_bboxes, gt_labels)\n\n        sampling_result = self.sampler.sample(assign_result, priors, gt_bboxes)\n        sampling_result_centers2d = self.sampler_.sample(assign_result, priors, centers2d)\n        pos_inds = sampling_result.pos_inds\n        num_pos_per_img = pos_inds.size(0)\n\n        pos_ious = assign_result.max_overlaps[pos_inds]\n        # IOU aware classification score\n        cls_target = F.one_hot(sampling_result.pos_gt_labels,\n                               self.num_classes) * pos_ious.unsqueeze(-1)\n        obj_target = torch.zeros_like(objectness).unsqueeze(-1)\n        obj_target[pos_inds] = 1\n        bbox_target = sampling_result.pos_gt_bboxes\n        l1_target = cls_preds.new_zeros((num_pos_per_img, 4))\n        if self.use_l1:\n            l1_target = self._get_l1_target(l1_target, bbox_target, priors[pos_inds])\n        foreground_mask = torch.zeros_like(objectness).to(torch.bool)\n        foreground_mask[pos_inds] = 1\n\n        #centers2d target\n\n        centers2d_labels = sampling_result_centers2d.pos_gt_bboxes\n        centers2d_target = cls_preds.new_zeros((num_pos_per_img, 2))\n        centers2d_target = self._get_centers2d_target(centers2d_target, centers2d_labels, priors[pos_inds])\n        return (foreground_mask, cls_target, obj_target, bbox_target,\n                l1_target, centers2d_target, num_pos_per_img)\n\n    def _get_l1_target(self, l1_target, gt_bboxes, priors, eps=1e-8):\n        \"\"\"Convert gt bboxes to center offset and log width height.\"\"\"\n        gt_cxcywh = bbox_xyxy_to_cxcywh(gt_bboxes)\n        l1_target[:, :2] = (gt_cxcywh[:, :2] - priors[:, :2]) / priors[:, 2:]\n        l1_target[:, 2:] = torch.log(gt_cxcywh[:, 2:] / priors[:, 2:] + eps)\n        return l1_target\n    \n    def _get_centers2d_target(self, centers2d_target, centers2d_labels, priors):\n        centers2d_target = (centers2d_labels - priors[:, :2]) / priors[:, 2:]\n        return centers2d_target\n"
  },
  {
    "path": "mmdet3d/models/fbbev/modules/__init__.py",
    "content": "from .depth_net import NaiveDepthNet, CM_DepthNet\nfrom .frpn import FRPN\nfrom .fpn3d import FPN3D\nfrom .resnet3d import CustomResNet3D\nfrom .occ_loss_utils import *"
  },
  {
    "path": "mmdet3d/models/fbbev/modules/depth_net.py",
    "content": "# Copyright (c) 2022-2023, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# https://github.com/NVlabs/FB-BEV/blob/main/LICENSE\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import build_conv_layer\nfrom mmcv.runner import BaseModule, force_fp32\nfrom torch.cuda.amp.autocast_mode import autocast\nfrom torch.utils.checkpoint import checkpoint\nfrom mmdet.models.backbones.resnet import BasicBlock\nfrom mmdet.models import HEADS\nimport torch.utils.checkpoint as cp\nfrom mmdet3d.models import builder\nfrom mmcv.runner import force_fp32, auto_fp16\nimport torch\nfrom torchvision.utils import make_grid\nimport torchvision\nimport matplotlib.pyplot as plt\nimport cv2\n\ndef convert_color(img_path):\n    plt.figure()\n    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)\n    plt.imsave(img_path, img, cmap=plt.get_cmap('viridis'))\n    plt.close()\n\n\ndef save_tensor(tensor, path, pad_value=254.0,normalize=False):\n    print('save_tensor', path)\n    tensor = tensor.to(torch.float).detach().cpu()\n    max_ = tensor.flatten(1).max(-1).values[:, None, None]\n    min_ = tensor.flatten(1).min(-1).values[:, None, None]\n    tensor = (tensor-min_)/(max_-min_)\n    if tensor.type() == 'torch.BoolTensor':\n        tensor = tensor*255\n    if len(tensor.shape) == 3:\n        tensor = tensor.unsqueeze(1)\n    tensor = make_grid(tensor, pad_value=pad_value, normalize=normalize).permute(1, 2, 0).numpy().copy()\n    torchvision.utils.save_image(torch.tensor(tensor).permute(2, 0, 1), path)\n    convert_color(path)\n\n\n@HEADS.register_module()\nclass NaiveDepthNet(BaseModule):\n    r\"\"\"Naive depthnet used in Lift-Splat-Shoot \n\n    Please refer to the `paper <https://arxiv.org/abs/2008.05711>`_\n\n    Args:\n        in_channels (int): Channels of input feature.\n        context_channels (int): Channels of transformed feature.\n    \"\"\"\n\n    def __init__(\n        self,\n        in_channels=512,\n        context_channels=64,\n        depth_channels=118,\n        downsample=16,\n        uniform=False,\n        with_cp=False\n    ):\n        super(NaiveDepthNet, self).__init__()\n        self.uniform = uniform\n        self.with_cp = with_cp     \n        self.context_channels = context_channels\n        self.in_channels = in_channels\n        self.D =depth_channels\n        self.downsample=downsample,\n        self.depth_net = nn.Conv2d(\n            in_channels, self.D + self.context_channels, kernel_size=1, padding=0)\n    \n    @force_fp32()\n    def forward(self, x, mlp_input=None):\n        \"\"\"\n        \"\"\"\n       \n        B, N, C, H, W = x.shape\n        x = x.view(B * N, C, H, W)\n        if self.with_cp and x.requires_grad:\n            x = cp.checkpoint(self.depth_net, x)\n        else:\n            x = self.depth_net(x)            \n\n        depth_digit = x[:, :self.D, ...]\n        context = x[:, self.D:self.D + self.context_channels, ...]\n        if self.uniform:\n            depth_digit = depth_digit * 0\n            depth = depth_digit.softmax(dim=1)\n        else:\n            depth = depth_digit.softmax(dim=1)\n        context = context.view(B, N,  self.context_channels, H, W)\n        depth = depth.view(B, N,  self.D, H, W)\n        return context, depth\n\n    def get_mlp_input(self, rot, tran, intrin, post_rot, post_tran, bda):\n        return None\n\n\n\nclass _ASPPModule(nn.Module):\n\n    def __init__(self, inplanes, planes, kernel_size, padding, dilation,\n                 BatchNorm):\n        super(_ASPPModule, self).__init__()\n        self.atrous_conv = nn.Conv2d(\n            inplanes,\n            planes,\n            kernel_size=kernel_size,\n            stride=1,\n            padding=padding,\n            dilation=dilation,\n            bias=False)\n        self.bn = BatchNorm(planes)\n        self.relu = nn.ReLU()\n\n        self._init_weight()\n    \n    @force_fp32()\n    def forward(self, x):\n        x = self.atrous_conv(x)\n        x = self.bn(x)\n\n        return self.relu(x)\n\n    def _init_weight(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                torch.nn.init.kaiming_normal_(m.weight)\n            elif isinstance(m, nn.BatchNorm2d):\n                m.weight.data.fill_(1)\n                m.bias.data.zero_()\n\n\nclass ASPP(nn.Module):\n\n    def __init__(self, inplanes, mid_channels=256, BatchNorm=nn.BatchNorm2d):\n        super(ASPP, self).__init__()\n\n        dilations = [1, 6, 12, 18]\n\n        self.aspp1 = _ASPPModule(\n            inplanes,\n            mid_channels,\n            1,\n            padding=0,\n            dilation=dilations[0],\n            BatchNorm=BatchNorm)\n        self.aspp2 = _ASPPModule(\n            inplanes,\n            mid_channels,\n            3,\n            padding=dilations[1],\n            dilation=dilations[1],\n            BatchNorm=BatchNorm)\n        self.aspp3 = _ASPPModule(\n            inplanes,\n            mid_channels,\n            3,\n            padding=dilations[2],\n            dilation=dilations[2],\n            BatchNorm=BatchNorm)\n        self.aspp4 = _ASPPModule(\n            inplanes,\n            mid_channels,\n            3,\n            padding=dilations[3],\n            dilation=dilations[3],\n            BatchNorm=BatchNorm)\n\n        self.global_avg_pool = nn.Sequential(\n            nn.AdaptiveAvgPool2d((1, 1)),\n            nn.Conv2d(inplanes, mid_channels, 1, stride=1, bias=False),\n            BatchNorm(mid_channels),\n            nn.ReLU(),\n        )\n        self.conv1 = nn.Conv2d(\n            int(mid_channels * 5), inplanes, 1, bias=False)\n        self.bn1 = BatchNorm(inplanes)\n        self.relu = nn.ReLU()\n        self.dropout = nn.Dropout(0.5)\n        self._init_weight()\n    \n    @force_fp32()\n    def forward(self, x):\n        x1 = self.aspp1(x)\n        x2 = self.aspp2(x)\n        x3 = self.aspp3(x)\n        x4 = self.aspp4(x)\n        x5 = self.global_avg_pool(x)\n        x5 = F.interpolate(\n            x5, size=x4.size()[2:], mode='bilinear', align_corners=True)\n        x = torch.cat((x1, x2, x3, x4, x5), dim=1)\n\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n\n        return self.dropout(x)\n\n    def _init_weight(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                torch.nn.init.kaiming_normal_(m.weight)\n            elif isinstance(m, nn.BatchNorm2d):\n                m.weight.data.fill_(1)\n                m.bias.data.zero_()\n\n\nclass Mlp(nn.Module):\n\n    def __init__(self,\n                 in_features,\n                 hidden_features=None,\n                 out_features=None,\n                 act_layer=nn.ReLU,\n                 drop=0.0):\n        super().__init__()\n        out_features = out_features or in_features\n        hidden_features = hidden_features or in_features\n        self.fc1 = nn.Linear(in_features, hidden_features)\n        self.act = act_layer()\n        self.drop1 = nn.Dropout(drop)\n        self.fc2 = nn.Linear(hidden_features, out_features)\n        self.drop2 = nn.Dropout(drop)\n    \n    @force_fp32()\n    def forward(self, x):\n        x = self.fc1(x)\n        x = self.act(x)\n        x = self.drop1(x)\n        x = self.fc2(x)\n        x = self.drop2(x)\n        return x\n\n\nclass SELayer(nn.Module):\n\n    def __init__(self, channels, act_layer=nn.ReLU, gate_layer=nn.Sigmoid):\n        super().__init__()\n        self.conv_reduce = nn.Conv2d(channels, channels, 1, bias=True)\n        self.act1 = act_layer()\n        self.conv_expand = nn.Conv2d(channels, channels, 1, bias=True)\n        self.gate = gate_layer()\n    \n    @force_fp32()\n    def forward(self, x, x_se):\n        x_se = self.conv_reduce(x_se)\n        x_se = self.act1(x_se)\n        x_se = self.conv_expand(x_se)\n        return x * self.gate(x_se)\n\n\n@HEADS.register_module()\nclass CM_DepthNet(BaseModule):\n    \"\"\"\n        Camera parameters aware depth net\n    \"\"\"\n    def __init__(self,\n                 in_channels=512,\n                 context_channels=64,\n                 depth_channels=118,\n                 mid_channels=512,\n                 use_dcn=True,\n                 downsample=16,\n                 grid_config=None,\n                 loss_depth_weight=3.0,\n                 with_cp=False,\n                 se_depth_map=False,\n                 sid=False,\n                 bias=0.0,\n                 input_size=None,\n                 aspp_mid_channels=-1,\n                 use_aspp=True):\n        super(CM_DepthNet, self).__init__()\n        self.fp16_enable=False\n        self.sid=sid\n        self.with_cp = with_cp\n        self.downsample = downsample\n        self.grid_config = grid_config\n        self.loss_depth_weight = loss_depth_weight\n        self.reduce_conv = nn.Sequential(\n            nn.Conv2d(\n                in_channels, mid_channels, kernel_size=3, stride=1, padding=1),\n            nn.BatchNorm2d(mid_channels),\n            nn.ReLU(inplace=True),\n        )\n        self.context_channels = context_channels\n        self.depth_channels = depth_channels\n        self.se_depth_map = se_depth_map\n        self.context_conv = nn.Conv2d(\n            mid_channels, context_channels, kernel_size=1, stride=1, padding=0)\n        self.bn = nn.BatchNorm1d(27)\n        self.depth_mlp = Mlp(27, mid_channels, mid_channels)\n        self.depth_se = SELayer(mid_channels)  # NOTE: add camera-aware\n        self.context_mlp = Mlp(27, mid_channels, mid_channels)\n        self.context_se = SELayer(mid_channels)  # NOTE: add camera-aware\n        depth_conv_input_channels = mid_channels\n        downsample = None\n\n        depth_conv_list = [\n           BasicBlock(depth_conv_input_channels, mid_channels,\n                                      downsample=downsample),\n            BasicBlock(mid_channels, mid_channels),\n            BasicBlock(mid_channels, mid_channels),\n        ]\n        if use_aspp:\n            if aspp_mid_channels < 0:\n                aspp_mid_channels = mid_channels\n            depth_conv_list.append(ASPP(mid_channels, aspp_mid_channels))\n        if use_dcn:\n            depth_conv_list.append(\n                build_conv_layer(\n                    cfg=dict(\n                        type='DCN',\n                        in_channels=mid_channels,\n                        out_channels=mid_channels,\n                        kernel_size=3,\n                        padding=1,\n                        groups=4,\n                        im2col_step=128,\n                    )))\n        depth_conv_list.append(\n            nn.Conv2d(\n                mid_channels,\n                depth_channels,\n                kernel_size=1,\n                stride=1,\n                padding=0))\n        self.depth_conv = nn.Sequential(*depth_conv_list)\n\n\n  \n    @force_fp32()\n    def forward(self, x, mlp_input):\n\n        # if not  x.requires_grad: \n        x = x.to(torch.float32) # FIX distill type error\n        mlp_input = self.bn(mlp_input.reshape(-1, mlp_input.shape[-1]))\n        B, N, C, H, W = x.shape\n        x = x.view(B * N, C, H, W)\n        if self.with_cp and x.requires_grad:\n            x = cp.checkpoint(self.reduce_conv, x)\n        else:\n            x = self.reduce_conv(x)\n        context_se = self.context_mlp(mlp_input)[..., None, None]\n        if self.with_cp and x.requires_grad:\n            context = cp.checkpoint(self.context_se, x, context_se)\n        else:\n            context = self.context_se(x, context_se)\n        context = self.context_conv(context)\n        depth_se = self.depth_mlp(mlp_input)[..., None, None]\n        depth = self.depth_se(x, depth_se)\n\n\n            \n        if self.with_cp and depth.requires_grad:\n            depth = cp.checkpoint(self.depth_conv, depth)\n        else:\n            depth = self.depth_conv(depth)\n        depth = depth.softmax(dim=1)\n        context = context.view(B, N,  self.context_channels, H, W)\n        depth = depth.view(B, N, self.depth_channels, H, W)\n\n        return context, depth\n\n\n    def get_mlp_input(self, rot, tran, intrin, post_rot, post_tran, bda):\n        B, N, _, _ = rot.shape\n        bda = bda.view(B, 1, 3, 3).repeat(1, N, 1, 1)\n        mlp_input = torch.stack([\n            intrin[:, :, 0, 0],\n            intrin[:, :, 1, 1],\n            intrin[:, :, 0, 2],\n            intrin[:, :, 1, 2],\n            post_rot[:, :, 0, 0],\n            post_rot[:, :, 0, 1],\n            post_tran[:, :, 0],\n            post_rot[:, :, 1, 0],\n            post_rot[:, :, 1, 1],\n            post_tran[:, :, 1],\n            bda[:, :, 0, 0],\n            bda[:, :, 0, 1],\n            bda[:, :, 1, 0],\n            bda[:, :, 1, 1],\n            bda[:, :, 2, 2],\n        ],\n                                dim=-1)\n        sensor2ego = torch.cat([rot, tran.reshape(B, N, 3, 1)],\n                               dim=-1).reshape(B, N, -1)\n        mlp_input = torch.cat([mlp_input, sensor2ego], dim=-1)\n        return mlp_input\n\n\n    def get_downsampled_gt_depth(self, gt_depths):\n        \"\"\"\n        Input:\n            gt_depths: [B, N, H, W]\n        Output:\n            gt_depths: [B*N*h*w, d]\n        \"\"\"\n        downsample = self.downsample\n        # if self.downsample == 8 and self.se_depth_map:\n        #    downsample = 16 \n        B, N, H, W = gt_depths.shape\n        gt_depths = gt_depths.view(B * N, H // downsample,\n                                   downsample, W // downsample,\n                                   downsample, 1)\n        gt_depths = gt_depths.permute(0, 1, 3, 5, 2, 4).contiguous()\n        gt_depths = gt_depths.view(-1, downsample * downsample)\n        gt_depths_tmp = torch.where(gt_depths == 0.0,\n                                    1e5 * torch.ones_like(gt_depths),\n                                    gt_depths)\n        gt_depths = torch.min(gt_depths_tmp, dim=-1).values\n        gt_depths = gt_depths.view(B * N, H // downsample,\n                                   W // downsample)\n        if not self.sid:\n            gt_depths = (gt_depths - (self.grid_config['depth'][0] -\n                                      self.grid_config['depth'][2])) / \\\n                        self.grid_config['depth'][2]\n        else:\n            gt_depths = torch.log(gt_depths) - torch.log(\n                torch.tensor(self.grid_config['depth'][0]).float())\n            gt_depths = gt_depths * (self.D - 1) / torch.log(\n                torch.tensor(self.grid_config['depth'][1] - 1.).float() /\n                self.grid_config['depth'][0])\n            gt_depths = gt_depths + 1.\n        gt_depths = torch.where((gt_depths < self.depth_channels + 1) & (gt_depths >= 0.0),\n                                gt_depths, torch.zeros_like(gt_depths))\n        gt_depths = F.one_hot(\n            gt_depths.long(), num_classes=self.depth_channels + 1).view(-1, self.depth_channels + 1)[:,\n                                                                           1:]\n        return gt_depths.float()\n\n    @force_fp32()\n    def get_depth_loss(self, depth_labels, depth_preds):\n        depth_labels = self.get_downsampled_gt_depth(depth_labels)\n        depth_preds = depth_preds.permute(0, 1, 3, 4,\n                                          2).contiguous().view(-1, self.depth_channels)\n        fg_mask = torch.max(depth_labels, dim=1).values > 0.0\n        depth_labels = depth_labels[fg_mask]\n        depth_preds = depth_preds[fg_mask]\n        with autocast(enabled=False):\n            depth_loss = F.binary_cross_entropy(\n                depth_preds,\n                depth_labels,\n                reduction='none',\n            ).sum() / max(1.0, fg_mask.sum())\n        return dict(loss_depth=self.loss_depth_weight * depth_loss)\n\n\n\n\n@HEADS.register_module()\nclass CM_ContextNet(nn.Module):\n    \"\"\"\n        Camera parameters aware depth net\n    \"\"\"\n    def __init__(self,\n                 in_channels=512,\n                 context_channels=64,\n                 mid_channels=512,\n                 with_cp=False,\n                 ):\n        super(CM_ContextNet, self).__init__()\n        self.with_cp = with_cp\n        self.reduce_conv = nn.Sequential(\n            nn.Conv2d(\n                in_channels, mid_channels, kernel_size=3, stride=1, padding=1),\n            nn.BatchNorm2d(mid_channels),\n            nn.ReLU(inplace=True),\n        )\n        self.context_channels = context_channels\n        self.context_conv = nn.Conv2d(\n            mid_channels, context_channels, kernel_size=1, stride=1, padding=0)\n        self.bn = nn.BatchNorm1d(27)\n        self.context_mlp = Mlp(27, mid_channels, mid_channels)\n        self.context_se = SELayer(mid_channels)  # NOTE: add camera-aware\n\n    \n    @force_fp32()\n    def forward(self, x, mlp_input):\n        mlp_input = self.bn(mlp_input.reshape(-1, mlp_input.shape[-1]))\n        B, N, C, H, W = x.shape\n        x = x.view(B * N, C, H, W)\n        if self.with_cp and x.requires_grad:\n            x = cp.checkpoint(self.reduce_conv, x)\n        else:\n            x = self.reduce_conv(x)\n        context_se = self.context_mlp(mlp_input)[..., None, None]\n        if self.with_cp and x.requires_grad:\n            context = cp.checkpoint(self.context_se, x, context_se)\n        else:\n            context = self.context_se(x, context_se)\n        context = self.context_conv(context)\n        context = context.view(B, N,  self.context_channels, H, W)\n        return context\n"
  },
  {
    "path": "mmdet3d/models/fbbev/modules/fpn3d.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nfrom mmcv.cnn import build_conv_layer, build_norm_layer, build_upsample_layer\nfrom mmcv.runner import BaseModule, auto_fp16\nfrom torch import nn as nn\nfrom mmcv.cnn import ConvModule\nfrom mmdet.models import NECKS\n\nimport torch.nn.functional as F\nimport pdb\nfrom mmcv.runner import BaseModule, force_fp32\n\n@NECKS.register_module()\nclass FPN3D(BaseModule):\n    \"\"\"FPN used in SECOND/PointPillars/PartA2/MVXNet.\n\n    Args:\n        in_channels (list[int]): Input channels of multi-scale feature maps.\n        out_channels (list[int]): Output channels of feature maps.\n        upsample_strides (list[int]): Strides used to upsample the\n            feature maps.\n        norm_cfg (dict): Config dict of normalization layers.\n        upsample_cfg (dict): Config dict of upsample layers.\n        conv_cfg (dict): Config dict of conv layers.\n        use_conv_for_no_stride (bool): Whether to use conv when stride is 1.\n    \"\"\"\n    def __init__(self,\n                 in_channels=[80, 160, 320, 640],\n                 out_channels=256,\n                 norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),\n                 conv_cfg=dict(type='Conv3d'),\n                 act_cfg=dict(type='ReLU'),\n                 with_cp=False,\n                 upsample_cfg=dict(mode='trilinear'),\n                 init_cfg=None):\n        super(FPN3D, self).__init__(init_cfg=init_cfg)\n        \n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        self.fp16_enabled = False\n        self.upsample_cfg = upsample_cfg\n        self.with_cp = with_cp\n        \n        self.num_out = len(self.in_channels)\n        self.lateral_convs = nn.ModuleList()\n        self.fpn_convs = nn.ModuleList()\n        \n        for i in range(self.num_out):\n            \n            l_conv = nn.Sequential(\n                ConvModule(in_channels[i], out_channels, \n                    kernel_size=1, padding=0,\n                    conv_cfg=conv_cfg, norm_cfg=norm_cfg, \n                    act_cfg=act_cfg, bias=False, \n                    inplace=True),\n            )\n            \n            fpn_conv = nn.Sequential(\n                ConvModule(out_channels, out_channels, \n                    kernel_size=3, padding=1,\n                    conv_cfg=conv_cfg, norm_cfg=norm_cfg, \n                    act_cfg=act_cfg, bias=False, \n                    inplace=True),\n            )\n\n            self.lateral_convs.append(l_conv)\n            self.fpn_convs.append(fpn_conv)\n\n    @force_fp32()\n    def forward(self, inputs):\n        \"\"\"Forward function.\n\n        Args:\n            x (torch.Tensor): 4D Tensor in (N, C, H, W) shape.\n\n        Returns:\n            list[torch.Tensor]: Multi-level feature maps.\n        \"\"\"\n\n        assert len(inputs) == len(self.in_channels)\n\n        # build laterals\n        laterals = []\n        for i, lateral_conv in enumerate(self.lateral_convs):\n            if self.with_cp:\n                lateral_i = torch.utils.checkpoint.checkpoint(lateral_conv, inputs[i])\n            else:\n                lateral_i = lateral_conv(inputs[i])\n            laterals.append(lateral_i)\n\n        # build down-top path\n        for i in range(self.num_out - 1, 0, -1):\n            prev_shape = laterals[i - 1].shape[2:]\n            laterals[i - 1] = laterals[i - 1] + F.interpolate(laterals[i], \n                    size=prev_shape, align_corners=False, **self.upsample_cfg)\n        \n        # outs = [\n        #     self.fpn_convs[i](laterals[i]) for i in range(self.num_out)\n        # ]\n        \n        outs = []\n        for i, fpn_conv in enumerate(self.fpn_convs):\n            if self.with_cp:\n                out_i = torch.utils.checkpoint.checkpoint(fpn_conv, laterals[i])\n            else:\n                out_i = fpn_conv(laterals[i])\n            outs.append(out_i)\n        \n        return outs\n"
  },
  {
    "path": "mmdet3d/models/fbbev/modules/frpn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import build_conv_layer\nfrom mmcv.runner import BaseModule, force_fp32\nfrom torch.cuda.amp.autocast_mode import autocast\nfrom torch.utils.checkpoint import checkpoint\nfrom mmdet.models.backbones.resnet import BasicBlock\nfrom mmdet.models import HEADS\nimport torch.utils.checkpoint as cp\nfrom mmdet3d.models.builder import build_loss\n\n\n@HEADS.register_module()\nclass FRPN(BaseModule):\n    r\"\"\"\n    Args:\n        in_channels (int): Channels of input feature.\n        context_channels (int): Channels of transformed feature.\n    \"\"\"\n\n    def __init__(\n        self,\n        in_channels=512,\n        scale_factor=1,\n        mask_thre = 0.4,\n    ):\n        super(FRPN, self).__init__()\n        self.mask_net = nn.Sequential(\n            nn.Conv2d(in_channels, in_channels//2, kernel_size=3, padding=1, stride=1),\n            nn.BatchNorm2d(in_channels//2),\n            nn.ReLU(),\n            nn.Conv2d(in_channels//2, 1, kernel_size=3, padding=1, stride=1),\n            )\n        self.upsample = nn.Upsample(scale_factor = scale_factor , mode ='bilinear',align_corners = True)\n        self.dice_loss = build_loss(dict(type='CustomDiceLoss', use_sigmoid=True, loss_weight=1.))\n        self.ce_loss = torch.nn.BCEWithLogitsLoss(pos_weight=torch.tensor([2.13]))  # From lss\n        self.mask_thre = mask_thre\n\n    def forward(self, input):\n        \"\"\"\n        \"\"\"\n        bev_mask = self.mask_net(input)            \n        bev_mask = self.upsample(bev_mask)\n        return bev_mask\n    \n    \n    def get_bev_mask_loss(self, gt_bev_mask, pred_bev_mask):\n        bs, bev_h, bev_w = gt_bev_mask.shape\n        b = gt_bev_mask.reshape(bs , bev_w * bev_h).permute(1, 0).to(torch.float)\n        a = pred_bev_mask.reshape(bs, bev_w * bev_h).permute(1, 0)\n        mask_ce_loss = self.ce_loss(a, b)\n        mask_dice_loss = self.dice_loss(pred_bev_mask.reshape(bs, -1), gt_bev_mask.reshape(bs, -1))\n        return dict(mask_ce_loss=mask_ce_loss, mask_dice_loss=mask_dice_loss)\n"
  },
  {
    "path": "mmdet3d/models/fbbev/modules/occ_loss_utils/__init__.py",
    "content": "\nfrom .lovasz_softmax  import *\nfrom .nusc_param import *\nfrom .semkitti import *\nfrom .focal_loss import CustomFocalLoss"
  },
  {
    "path": "mmdet3d/models/fbbev/modules/occ_loss_utils/focal_loss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.ops import sigmoid_focal_loss as _sigmoid_focal_loss\n\nfrom mmdet.models.builder import LOSSES\nfrom mmdet.models.losses.utils import weight_reduce_loss\nimport numpy as np\n\n# This method is only for debugging\ndef py_sigmoid_focal_loss(pred,\n                          target,\n                          weight=None,\n                          gamma=2.0,\n                          alpha=0.25,\n                          reduction='mean',\n                          avg_factor=None):\n    \"\"\"PyTorch version of `Focal Loss <https://arxiv.org/abs/1708.02002>`_.\n    Args:\n        pred (torch.Tensor): The prediction with shape (N, C), C is the\n            number of classes\n        target (torch.Tensor): The learning label of the prediction.\n        weight (torch.Tensor, optional): Sample-wise loss weight.\n        gamma (float, optional): The gamma for calculating the modulating\n            factor. Defaults to 2.0.\n        alpha (float, optional): A balanced form for Focal Loss.\n            Defaults to 0.25.\n        reduction (str, optional): The method used to reduce the loss into\n            a scalar. Defaults to 'mean'.\n        avg_factor (int, optional): Average factor that is used to average\n            the loss. Defaults to None.\n    \"\"\"\n    pred_sigmoid = pred.sigmoid()\n    target = target.type_as(pred)\n    pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target)\n    focal_weight = (alpha * target + (1 - alpha) *\n                    (1 - target)) * pt.pow(gamma)\n    loss = F.binary_cross_entropy_with_logits(\n        pred, target, reduction='none') * focal_weight\n    if weight is not None:\n        if weight.shape != loss.shape:\n            if weight.size(0) == loss.size(0):\n                # For most cases, weight is of shape (num_priors, ),\n                #  which means it does not have the second axis num_class\n                weight = weight.view(-1, 1)\n            else:\n                # Sometimes, weight per anchor per class is also needed. e.g.\n                #  in FSAF. But it may be flattened of shape\n                #  (num_priors x num_class, ), while loss is still of shape\n                #  (num_priors, num_class).\n                assert weight.numel() == loss.numel()\n                weight = weight.view(loss.size(0), -1)\n        assert weight.ndim == loss.ndim\n        loss = loss * weight\n\n    loss = loss.sum(-1).mean()\n    # loss = weight_reduce_loss(loss, weight, reduction, avg_factor)\n    return loss\n\n\ndef py_focal_loss_with_prob(pred,\n                            target,\n                            weight=None,\n                            gamma=2.0,\n                            alpha=0.25,\n                            reduction='mean',\n                            avg_factor=None):\n    \"\"\"PyTorch version of `Focal Loss <https://arxiv.org/abs/1708.02002>`_.\n    Different from `py_sigmoid_focal_loss`, this function accepts probability\n    as input.\n    Args:\n        pred (torch.Tensor): The prediction probability with shape (N, C),\n            C is the number of classes.\n        target (torch.Tensor): The learning label of the prediction.\n        weight (torch.Tensor, optional): Sample-wise loss weight.\n        gamma (float, optional): The gamma for calculating the modulating\n            factor. Defaults to 2.0.\n        alpha (float, optional): A balanced form for Focal Loss.\n            Defaults to 0.25.\n        reduction (str, optional): The method used to reduce the loss into\n            a scalar. Defaults to 'mean'.\n        avg_factor (int, optional): Average factor that is used to average\n            the loss. Defaults to None.\n    \"\"\"\n    num_classes = pred.size(1)\n    target = F.one_hot(target, num_classes=num_classes + 1)\n    target = target[:, :num_classes]\n\n    target = target.type_as(pred)\n    pt = (1 - pred) * target + pred * (1 - target)\n    focal_weight = (alpha * target + (1 - alpha) *\n                    (1 - target)) * pt.pow(gamma)\n    loss = F.binary_cross_entropy(\n        pred, target, reduction='none') * focal_weight\n\n    if weight is not None:\n        if weight.shape != loss.shape:\n            if weight.size(0) == loss.size(0):\n                # For most cases, weight is of shape (num_priors, ),\n                #  which means it does not have the second axis num_class\n                weight = weight.view(-1, 1)\n            else:\n                # Sometimes, weight per anchor per class is also needed. e.g.\n                #  in FSAF. But it may be flattened of shape\n                #  (num_priors x num_class, ), while loss is still of shape\n                #  (num_priors, num_class).\n                assert weight.numel() == loss.numel()\n                weight = weight.view(loss.size(0), -1)\n        assert weight.ndim == loss.ndim\n    loss = weight_reduce_loss(loss, weight, reduction, avg_factor)\n    return loss\n\n\ndef sigmoid_focal_loss(pred,\n                       target,\n                       weight=None,\n                       gamma=2.0,\n                       alpha=0.25,\n                       reduction='mean',\n                       avg_factor=None):\n    r\"\"\"A wrapper of cuda version `Focal Loss\n    <https://arxiv.org/abs/1708.02002>`_.\n    Args:\n        pred (torch.Tensor): The prediction with shape (N, C), C is the number\n            of classes.\n        target (torch.Tensor): The learning label of the prediction.\n        weight (torch.Tensor, optional): Sample-wise loss weight.\n        gamma (float, optional): The gamma for calculating the modulating\n            factor. Defaults to 2.0.\n        alpha (float, optional): A balanced form for Focal Loss.\n            Defaults to 0.25.\n        reduction (str, optional): The method used to reduce the loss into\n            a scalar. Defaults to 'mean'. Options are \"none\", \"mean\" and \"sum\".\n        avg_factor (int, optional): Average factor that is used to average\n            the loss. Defaults to None.\n    \"\"\"\n    # Function.apply does not accept keyword arguments, so the decorator\n    # \"weighted_loss\" is not applicable\n    loss = _sigmoid_focal_loss(pred.contiguous(), target.contiguous(), gamma,\n                               alpha, None, 'none')\n    if weight is not None:\n        if weight.shape != loss.shape:\n            if weight.size(0) == loss.size(0):\n                # For most cases, weight is of shape (num_priors, ),\n                #  which means it does not have the second axis num_class\n                weight = weight.view(-1, 1)\n            else:\n                # Sometimes, weight per anchor per class is also needed. e.g.\n                #  in FSAF. But it may be flattened of shape\n                #  (num_priors x num_class, ), while loss is still of shape\n                #  (num_priors, num_class).\n                assert weight.numel() == loss.numel()\n                weight = weight.view(loss.size(0), -1)\n        assert weight.ndim == loss.ndim\n        loss = loss * weight\n    loss = loss.sum(-1).mean()\n    # loss = weight_reduce_loss(loss, weight, reduction, avg_factor)\n    return loss\n\n\n@LOSSES.register_module()\nclass CustomFocalLoss(nn.Module):\n\n    def __init__(self,\n                 use_sigmoid=True,\n                 gamma=2.0,\n                 alpha=0.25,\n                 reduction='mean',\n                 loss_weight=100.0,\n                 activated=False):\n        \"\"\"`Focal Loss <https://arxiv.org/abs/1708.02002>`_\n        Args:\n            use_sigmoid (bool, optional): Whether to the prediction is\n                used for sigmoid or softmax. Defaults to True.\n            gamma (float, optional): The gamma for calculating the modulating\n                factor. Defaults to 2.0.\n            alpha (float, optional): A balanced form for Focal Loss.\n                Defaults to 0.25.\n            reduction (str, optional): The method used to reduce the loss into\n                a scalar. Defaults to 'mean'. Options are \"none\", \"mean\" and\n                \"sum\".\n            loss_weight (float, optional): Weight of loss. Defaults to 1.0.\n            activated (bool, optional): Whether the input is activated.\n                If True, it means the input has been activated and can be\n                treated as probabilities. Else, it should be treated as logits.\n                Defaults to False.\n        \"\"\"\n        super(CustomFocalLoss, self).__init__()\n        assert use_sigmoid is True, 'Only sigmoid focal loss supported now.'\n        self.use_sigmoid = use_sigmoid\n        self.gamma = gamma\n        self.alpha = alpha\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n        self.activated = activated\n        H, W = 200, 200\n\n        xy, yx = torch.meshgrid([torch.arange(H)-H/2,  torch.arange(W)-W/2])\n        c = torch.stack([xy,yx], 2)\n        c = torch.norm(c, 2, -1)\n        c_max = c.max()\n        self.c = (c/c_max + 1).cuda()\n  \n        \n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                ignore_index=255,\n                reduction_override=None):\n        \"\"\"Forward function.\n        Args:\n            pred (torch.Tensor): The prediction.\n            target (torch.Tensor): The learning label of the prediction.\n            weight (torch.Tensor, optional): The weight of loss for each\n                prediction. Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Options are \"none\", \"mean\" and \"sum\".\n        Returns:\n            torch.Tensor: The calculated loss\n        \"\"\"\n        B, H, W, D = target.shape\n\n        c = self.c[None, :, :, None].repeat(B, 1, 1, D).reshape(-1)\n\n        visible_mask = (target!=ignore_index).reshape(-1).nonzero().squeeze(-1)\n        weight_mask = weight[None,:] * c[visible_mask, None]\n        # visible_mask[:, None]\n\n        num_classes = pred.size(1)\n        pred = pred.permute(0, 2, 3, 4, 1).reshape(-1, num_classes)[visible_mask]\n        target = target.reshape(-1)[visible_mask]\n        \n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        if self.use_sigmoid:\n            if self.activated:\n                calculate_loss_func = py_focal_loss_with_prob\n            else:\n                if torch.cuda.is_available() and pred.is_cuda:\n                    calculate_loss_func = sigmoid_focal_loss\n                else:\n                    num_classes = pred.size(1)\n                    target = F.one_hot(target, num_classes=num_classes + 1)\n                    target = target[:, :num_classes]\n                    calculate_loss_func = py_sigmoid_focal_loss\n\n\n            loss_cls = self.loss_weight * calculate_loss_func(\n                pred,\n                target.to(torch.long),\n                weight_mask,\n                gamma=self.gamma,\n                alpha=self.alpha,\n                reduction=reduction,\n                avg_factor=avg_factor)\n\n        else:\n            raise NotImplementedError\n        return loss_cls\n\n\n\n@LOSSES.register_module()\nclass CustomMSELoss(nn.Module):\n    \"\"\"MSELoss.\n    Args:\n        reduction (str, optional): The method that reduces the loss to a\n            scalar. Options are \"none\", \"mean\" and \"sum\".\n        loss_weight (float, optional): The weight of the loss. Defaults to 1.0\n    \"\"\"\n\n    def __init__(self, reduction='mean', loss_weight=1.0):\n        super().__init__()\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self,\n                pred,\n                target,\n                mask=None,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function of loss.\n        Args:\n            pred (torch.Tensor): The prediction.\n            target (torch.Tensor): The learning target of the prediction.\n            weight (torch.Tensor, optional): Weight of the loss for each\n                prediction. Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None.\n        Returns:\n            torch.Tensor: The calculated loss\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        # N, C, H, W = pred.shape\n        # mask = mask[:, None, :, :].repeat(1, C, 1, 1).to(torch.float)\n        loss = self.loss_weight * (F.mse_loss(pred, target, reduction='mean'))\n        return loss"
  },
  {
    "path": "mmdet3d/models/fbbev/modules/occ_loss_utils/lovasz_softmax.py",
    "content": "# -*- coding:utf-8 -*-\n# author: Xinge\n\n\n\"\"\"\nLovasz-Softmax and Jaccard hinge loss in PyTorch\nMaxim Berman 2018 ESAT-PSI KU Leuven (MIT License)\n\"\"\"\n\nfrom __future__ import print_function, division\n\nimport torch\nfrom torch.autograd import Variable\nimport torch.nn.functional as F\nimport numpy as np\ntry:\n    from itertools import  ifilterfalse\nexcept ImportError: # py3k\n    from itertools import  filterfalse as ifilterfalse\nfrom torch.cuda.amp import autocast\n\ndef lovasz_grad(gt_sorted):\n    \"\"\"\n    Computes gradient of the Lovasz extension w.r.t sorted errors\n    See Alg. 1 in paper\n    \"\"\"\n    p = len(gt_sorted)\n    gts = gt_sorted.sum()\n    intersection = gts - gt_sorted.float().cumsum(0)\n    union = gts + (1 - gt_sorted).float().cumsum(0)\n    jaccard = 1. - intersection / union\n    if p > 1: # cover 1-pixel case\n        jaccard[1:p] = jaccard[1:p] - jaccard[0:-1]\n    return jaccard\n\n\ndef iou_binary(preds, labels, EMPTY=1., ignore=None, per_image=True):\n    \"\"\"\n    IoU for foreground class\n    binary: 1 foreground, 0 background\n    \"\"\"\n    if not per_image:\n        preds, labels = (preds,), (labels,)\n    ious = []\n    for pred, label in zip(preds, labels):\n        intersection = ((label == 1) & (pred == 1)).sum()\n        union = ((label == 1) | ((pred == 1) & (label != ignore))).sum()\n        if not union:\n            iou = EMPTY\n        else:\n            iou = float(intersection) / float(union)\n        ious.append(iou)\n    iou = mean(ious)    # mean accross images if per_image\n    return 100 * iou\n\n\ndef iou(preds, labels, C, EMPTY=1., ignore=None, per_image=False):\n    \"\"\"\n    Array of IoU for each (non ignored) class\n    \"\"\"\n    if not per_image:\n        preds, labels = (preds,), (labels,)\n    ious = []\n    for pred, label in zip(preds, labels):\n        iou = []    \n        for i in range(C):\n            if i != ignore: # The ignored label is sometimes among predicted classes (ENet - CityScapes)\n                intersection = ((label == i) & (pred == i)).sum()\n                union = ((label == i) | ((pred == i) & (label != ignore))).sum()\n                if not union:\n                    iou.append(EMPTY)\n                else:\n                    iou.append(float(intersection) / float(union))\n        ious.append(iou)\n    ious = [mean(iou) for iou in zip(*ious)] # mean accross images if per_image\n    return 100 * np.array(ious)\n\n\n# --------------------------- BINARY LOSSES ---------------------------\n\n\ndef lovasz_hinge(logits, labels, per_image=True, ignore=None):\n    \"\"\"\n    Binary Lovasz hinge loss\n      logits: [B, H, W] Variable, logits at each pixel (between -\\infty and +\\infty)\n      labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)\n      per_image: compute the loss per image instead of per batch\n      ignore: void class id\n    \"\"\"\n    if per_image:\n        loss = mean(lovasz_hinge_flat(*flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore))\n                          for log, lab in zip(logits, labels))\n    else:\n        loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore))\n    return loss\n\n\ndef lovasz_hinge_flat(logits, labels):\n    \"\"\"\n    Binary Lovasz hinge loss\n      logits: [P] Variable, logits at each prediction (between -\\infty and +\\infty)\n      labels: [P] Tensor, binary ground truth labels (0 or 1)\n      ignore: label to ignore\n    \"\"\"\n    if len(labels) == 0:\n        # only void pixels, the gradients should be 0\n        return logits.sum() * 0.\n    signs = 2. * labels.float() - 1.\n    errors = (1. - logits * Variable(signs))\n    errors_sorted, perm = torch.sort(errors, dim=0, descending=True)\n    perm = perm.data\n    gt_sorted = labels[perm]\n    grad = lovasz_grad(gt_sorted)\n    loss = torch.dot(F.relu(errors_sorted), Variable(grad))\n    return loss\n\n\ndef flatten_binary_scores(scores, labels, ignore=None):\n    \"\"\"\n    Flattens predictions in the batch (binary case)\n    Remove labels equal to 'ignore'\n    \"\"\"\n    scores = scores.view(-1)\n    labels = labels.view(-1)\n    if ignore is None:\n        return scores, labels\n    valid = (labels != ignore)\n    vscores = scores[valid]\n    vlabels = labels[valid]\n    return vscores, vlabels\n\n\nclass StableBCELoss(torch.nn.modules.Module):\n    def __init__(self):\n         super(StableBCELoss, self).__init__()\n    def forward(self, input, target):\n         neg_abs = - input.abs()\n         loss = input.clamp(min=0) - input * target + (1 + neg_abs.exp()).log()\n         return loss.mean()\n\n\ndef binary_xloss(logits, labels, ignore=None):\n    \"\"\"\n    Binary Cross entropy loss\n      logits: [B, H, W] Variable, logits at each pixel (between -\\infty and +\\infty)\n      labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)\n      ignore: void class id\n    \"\"\"\n    logits, labels = flatten_binary_scores(logits, labels, ignore)\n    loss = StableBCELoss()(logits, Variable(labels.float()))\n    return loss\n\n\n# --------------------------- MULTICLASS LOSSES ---------------------------\n\n\ndef lovasz_softmax(probas, labels, classes='present', per_image=False, ignore=None):\n    \"\"\"\n    Multi-class Lovasz-Softmax loss\n      probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1).\n              Interpreted as binary (sigmoid) output with outputs of size [B, H, W].\n      labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1)\n      classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.\n      per_image: compute the loss per image instead of per batch\n      ignore: void class labels\n    \"\"\"\n    if per_image:\n        loss = mean(lovasz_softmax_flat(*flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore), classes=classes)\n                          for prob, lab in zip(probas, labels))\n    else:\n        with autocast(False):\n            loss = lovasz_softmax_flat(*flatten_probas(probas, labels, ignore), classes=classes)\n    return loss\n\n\ndef lovasz_softmax_flat(probas, labels, classes='present'):\n    \"\"\"\n    Multi-class Lovasz-Softmax loss\n      probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1)\n      labels: [P] Tensor, ground truth labels (between 0 and C - 1)\n      classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.\n    \"\"\"\n    if probas.numel() == 0:\n        # only void pixels, the gradients should be 0\n        return probas * 0.\n    C = probas.size(1)\n    losses = []\n    class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes\n    for c in class_to_sum:\n        fg = (labels == c).float() # foreground for class c\n        if (classes is 'present' and fg.sum() == 0):\n            continue\n        if C == 1:\n            if len(classes) > 1:\n                raise ValueError('Sigmoid output possible only with 1 class')\n            class_pred = probas[:, 0]\n        else:\n            class_pred = probas[:, c]\n        errors = (Variable(fg) - class_pred).abs()\n        errors_sorted, perm = torch.sort(errors, 0, descending=True)\n        perm = perm.data\n        fg_sorted = fg[perm]\n        losses.append(torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted))))\n    return mean(losses)\n\n\ndef flatten_probas(probas, labels, ignore=None):\n    \"\"\"\n    Flattens predictions in the batch\n    \"\"\"\n    if probas.dim() == 2:\n        if ignore is not None:\n            valid = (labels != ignore)\n            probas = probas[valid]\n            labels = labels[valid]\n        return probas, labels\n\n    elif probas.dim() == 3:\n        # assumes output of a sigmoid layer\n        B, H, W = probas.size()\n        probas = probas.view(B, 1, H, W)\n    elif probas.dim() == 5:\n        #3D segmentation\n        B, C, L, H, W = probas.size()\n        probas = probas.contiguous().view(B, C, L, H*W)\n    B, C, H, W = probas.size()\n    probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C)  # B * H * W, C = P, C\n    labels = labels.view(-1)\n    if ignore is None:\n        return probas, labels\n    valid = (labels != ignore)\n    vprobas = probas[valid.nonzero().squeeze()]\n    vlabels = labels[valid]\n    return vprobas, vlabels\n\ndef xloss(logits, labels, ignore=None):\n    \"\"\"\n    Cross entropy loss\n    \"\"\"\n    return F.cross_entropy(logits, Variable(labels), ignore_index=255)\n\ndef jaccard_loss(probas, labels,ignore=None, smooth = 100, bk_class = None):\n    \"\"\"\n    Something wrong with this loss\n    Multi-class Lovasz-Softmax loss\n      probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1).\n              Interpreted as binary (sigmoid) output with outputs of size [B, H, W].\n      labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1)\n      classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.\n      per_image: compute the loss per image instead of per batch\n      ignore: void class labels\n    \"\"\"\n    vprobas, vlabels = flatten_probas(probas, labels, ignore)\n    \n    \n    true_1_hot = torch.eye(vprobas.shape[1])[vlabels]\n    \n    if bk_class:\n        one_hot_assignment = torch.ones_like(vlabels)\n        one_hot_assignment[vlabels == bk_class] = 0\n        one_hot_assignment = one_hot_assignment.float().unsqueeze(1)\n        true_1_hot = true_1_hot*one_hot_assignment\n    \n    true_1_hot = true_1_hot.to(vprobas.device)\n    intersection = torch.sum(vprobas * true_1_hot)\n    cardinality = torch.sum(vprobas + true_1_hot)\n    loss = (intersection + smooth / (cardinality - intersection + smooth)).mean()\n    return (1-loss)*smooth\n\ndef hinge_jaccard_loss(probas, labels,ignore=None, classes = 'present', hinge = 0.1, smooth =100):\n    \"\"\"\n    Multi-class Hinge Jaccard loss\n      probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1).\n              Interpreted as binary (sigmoid) output with outputs of size [B, H, W].\n      labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1)\n      classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.\n      ignore: void class labels\n    \"\"\"\n    vprobas, vlabels = flatten_probas(probas, labels, ignore)\n    C = vprobas.size(1)\n    losses = []\n    class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes\n    for c in class_to_sum:\n        if c in vlabels:\n            c_sample_ind = vlabels == c\n            cprobas = vprobas[c_sample_ind,:]\n            non_c_ind =np.array([a for a in class_to_sum if a != c])\n            class_pred = cprobas[:,c]\n            max_non_class_pred = torch.max(cprobas[:,non_c_ind],dim = 1)[0]\n            TP = torch.sum(torch.clamp(class_pred - max_non_class_pred, max = hinge)+1.) + smooth\n            FN = torch.sum(torch.clamp(max_non_class_pred - class_pred, min = -hinge)+hinge)\n            \n            if (~c_sample_ind).sum() == 0:\n                FP = 0\n            else:\n                nonc_probas = vprobas[~c_sample_ind,:]\n                class_pred = nonc_probas[:,c]\n                max_non_class_pred = torch.max(nonc_probas[:,non_c_ind],dim = 1)[0]\n                FP = torch.sum(torch.clamp(class_pred - max_non_class_pred, max = hinge)+1.)\n            \n            losses.append(1 - TP/(TP+FP+FN))\n    \n    if len(losses) == 0: return 0\n    return mean(losses)\n\n# --------------------------- HELPER FUNCTIONS ---------------------------\ndef isnan(x):\n    return x != x\n    \n    \ndef mean(l, ignore_nan=False, empty=0):\n    \"\"\"\n    nanmean compatible with generators.\n    \"\"\"\n    l = iter(l)\n    if ignore_nan:\n        l = ifilterfalse(isnan, l)\n    try:\n        n = 1\n        acc = next(l)\n    except StopIteration:\n        if empty == 'raise':\n            raise ValueError('Empty mean')\n        return empty\n    for n, v in enumerate(l, 2):\n        acc += v\n    if n == 1:\n        return acc\n    return acc / n\n"
  },
  {
    "path": "mmdet3d/models/fbbev/modules/occ_loss_utils/nusc_param.py",
    "content": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport numpy as np\n\n# nusc_class_frequencies = np.array([57330862, 25985376, 1561108, 28862014, 196106643, 15920504,\n#                 2158753, 26539491, 4004729, 34838681, 75173306, 2255027978, 50959399, 646022466, 869055679,\n#                 1446141335, 1724391378, 2242961742295])\n\n# nusc_class_frequencies = np.array([2242961742295, 25985376, 1561108, 28862014, 196106643, 15920504,\n#                 2158753, 26539491, 4004729, 34838681, 75173306, 2255027978, 50959399, 646022466, 869055679,\n#                 1446141335, 1724391378])\n\nnusc_class_frequencies = np.array([\n 944004,\n 1897170,\n 152386,\n 2391677,\n 16957802,\n 724139,\n 189027,\n 2074468,\n 413451,\n 2384460,\n 5916653,\n 175883646,\n 4275424,\n 51393615,\n 61411620,\n 105975596,\n 116424404,\n 1892500630\n ])\n\n\n# nusc_class_names = [\n#     \"noise\",\n#     \"barrier\",\n#     \"bicycle\",\n#     \"bus\",\n#     \"car\",\n#     \"construction\",\n#     \"motorcycle\",\n#     \"pedestrian\",\n#     \"trafficcone\",\n#     \"trailer\",\n#     \"truck\",\n#     \"driveable_surface\",\n#     \"other\",\n#     \"sidewalk\",\n#     \"terrain\",\n#     \"mannade\",\n#     \"vegetation\",\n#     \"free\",\n# ]\n\nnusc_class_names = [\n    \"empty\", # 0\n    \"barrier\", # 1\n    \"bicycle\", # 2 \n    \"bus\", # 3 \n    \"car\", # 4\n    \"construction\", # 5\n    \"motorcycle\", # 6\n    \"pedestrian\", # 7\n    \"trafficcone\", # 8\n    \"trailer\", # 9\n    \"truck\", # 10\n    \"driveable_surface\", # 11\n    \"other\", # 12\n    \"sidewalk\", # 13\n    \"terrain\", # 14\n    \"mannade\", # 15 \n    \"vegetation\", # 16\n]\n\n# classname_to_color = {  # RGB.\n#     0: (0, 0, 0),  # Black. noise\n#     1: (112, 128, 144),  # Slategrey barrier\n#     2: (220, 20, 60),  # Crimson bicycle\n#     3: (255, 127, 80),  # Orangered bus\n#     4: (255, 158, 0),  # Orange car\n#     5: (233, 150, 70),  # Darksalmon construction\n#     6: (255, 61, 99),  # Red motorcycle\n#     7: (0, 0, 230),  # Blue pedestrian\n#     8: (47, 79, 79),  # Darkslategrey trafficcone\n#     9: (255, 140, 0),  # Darkorange trailer\n#     10: (255, 99, 71),  # Tomato truck\n#     11: (0, 207, 191),  # nuTonomy green driveable_surface\n#     12: (175, 0, 75),  # flat other\n#     13: (75, 0, 75),  # sidewalk\n#     14: (112, 180, 60),  # terrain\n#     15: (222, 184, 135),  # Burlywood mannade\n#     16: (0, 175, 0),  # Green vegetation\n# }\nclassname_to_color = {  # RGB.\n    # 0: (0, 0, 0),  # Black. noise\n    1: (112, 128, 144),  # Slategrey barrier\n    2: (220, 20, 60),  # Crimson bicycle\n    3: (255, 127, 80),  # Orangered bus\n    4: (255, 158, 0),  # Orange car\n    5: (233, 150, 70),  # Darksalmon construction\n    6: (255, 61, 99),  # Red motorcycle\n    7: (0, 0, 230),  # Blue pedestrian\n    8: (47, 79, 79),  # Darkslategrey trafficcone\n    9: (255, 140, 0),  # Darkorange trailer\n    10: (255, 99, 71),  # Tomato truck\n    11: (0, 207, 191),  # nuTonomy green driveable_surface\n    12: (175, 0, 75),  # flat other\n    13: (75, 0, 75),  # sidewalk\n    14: (112, 180, 60),  # terrain\n    15: (222, 184, 135),  # Burlywood mannade\n    16: (0, 175, 0),  # Green vegetation\n}\n\ndef KL_sep(p, target):\n    \"\"\"\n    KL divergence on nonzeros classes\n    \"\"\"\n    nonzeros = target != 0\n    nonzero_p = p[nonzeros]\n    kl_term = F.kl_div(torch.log(nonzero_p), target[nonzeros], reduction=\"sum\")\n    return kl_term\n\n\ndef geo_scal_loss(pred, ssc_target):\n\n    # Get softmax probabilities\n    pred = F.softmax(pred, dim=1)\n\n    # Compute empty and nonempty probabilities\n    empty_probs = pred[:, 0, :, :, :]\n    nonempty_probs = 1 - empty_probs\n\n    # Remove unknown voxels\n    mask = ssc_target != 255\n    nonempty_target = ssc_target != 0\n    nonempty_target = nonempty_target[mask].float()\n    nonempty_probs = nonempty_probs[mask]\n    empty_probs = empty_probs[mask]\n\n    intersection = (nonempty_target * nonempty_probs).sum()\n    precision = intersection / nonempty_probs.sum()\n    recall = intersection / nonempty_target.sum()\n    spec = ((1 - nonempty_target) * (empty_probs)).sum() / (1 - nonempty_target).sum()\n    return (\n        F.binary_cross_entropy(precision, torch.ones_like(precision))\n        + F.binary_cross_entropy(recall, torch.ones_like(recall))\n        + F.binary_cross_entropy(spec, torch.ones_like(spec))\n    )\n\n\ndef sem_scal_loss(pred, ssc_target):\n    # Get softmax probabilities\n    pred = F.softmax(pred, dim=1)\n    loss = 0\n    count = 0\n    mask = ssc_target != 255\n    n_classes = pred.shape[1]\n    for i in range(0, n_classes):\n\n        # Get probability of class i\n        p = pred[:, i, :, :, :]\n\n        # Remove unknown voxels\n        target_ori = ssc_target\n        p = p[mask]\n        target = ssc_target[mask]\n\n        completion_target = torch.ones_like(target)\n        completion_target[target != i] = 0\n        completion_target_ori = torch.ones_like(target_ori).float()\n        completion_target_ori[target_ori != i] = 0\n        if torch.sum(completion_target) > 0:\n            count += 1.0\n            nominator = torch.sum(p * completion_target)\n            loss_class = 0\n            if torch.sum(p) > 0:\n                precision = nominator / (torch.sum(p))\n                loss_precision = F.binary_cross_entropy(\n                    precision, torch.ones_like(precision)\n                )\n                loss_class += loss_precision\n            if torch.sum(completion_target) > 0:\n                recall = nominator / (torch.sum(completion_target))\n                loss_recall = F.binary_cross_entropy(recall, torch.ones_like(recall))\n                loss_class += loss_recall\n            if torch.sum(1 - completion_target) > 0:\n                specificity = torch.sum((1 - p) * (1 - completion_target)) / (\n                    torch.sum(1 - completion_target)\n                )\n                loss_specificity = F.binary_cross_entropy(\n                    specificity, torch.ones_like(specificity)\n                )\n                loss_class += loss_specificity\n            loss += loss_class\n    return loss / count\n\n\ndef CE_ssc_loss(pred, target, class_weights):\n    \"\"\"\n    :param: prediction: the predicted tensor, must be [BS, C, H, W, D]\n    \"\"\"\n    criterion = nn.CrossEntropyLoss(\n        weight=class_weights, ignore_index=255, reduction=\"mean\"\n    )\n    loss = criterion(pred, target.long())\n\n    return loss\n"
  },
  {
    "path": "mmdet3d/models/fbbev/modules/occ_loss_utils/semkitti.py",
    "content": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport numpy as np\n# from mmcv.runner import BaseModule, force_fp32\nfrom torch.cuda.amp import autocast\n\nsemantic_kitti_class_frequencies = np.array(\n    [\n        5.41773033e09,\n        1.57835390e07,\n        1.25136000e05,\n        1.18809000e05,\n        6.46799000e05,\n        8.21951000e05,\n        2.62978000e05,\n        2.83696000e05,\n        2.04750000e05,\n        6.16887030e07,\n        4.50296100e06,\n        4.48836500e07,\n        2.26992300e06,\n        5.68402180e07,\n        1.57196520e07,\n        1.58442623e08,\n        2.06162300e06,\n        3.69705220e07,\n        1.15198800e06,\n        3.34146000e05,\n    ]\n)\n\nkitti_class_names = [\n    \"empty\",\n    \"car\",\n    \"bicycle\",\n    \"motorcycle\",\n    \"truck\",\n    \"other-vehicle\",\n    \"person\",\n    \"bicyclist\",\n    \"motorcyclist\",\n    \"road\",\n    \"parking\",\n    \"sidewalk\",\n    \"other-ground\",\n    \"building\",\n    \"fence\",\n    \"vegetation\",\n    \"trunk\",\n    \"terrain\",\n    \"pole\",\n    \"traffic-sign\",\n]\n\n\n\ndef inverse_sigmoid(x, sign='A'):\n    x = x.to(torch.float32)\n    while x >= 1-1e-5:\n        x = x - 1e-5\n\n    while x< 1e-5:\n        x = x + 1e-5\n\n    return -torch.log((1 / x) - 1)\n\ndef KL_sep(p, target):\n    \"\"\"\n    KL divergence on nonzeros classes\n    \"\"\"\n    nonzeros = target != 0\n    nonzero_p = p[nonzeros]\n    kl_term = F.kl_div(torch.log(nonzero_p), target[nonzeros], reduction=\"sum\")\n    return kl_term\n\n\ndef geo_scal_loss(pred, ssc_target, ignore_index=255, non_empty_idx=0):\n\n    # Get softmax probabilities\n    pred = F.softmax(pred, dim=1)\n\n    # Compute empty and nonempty probabilities\n    empty_probs = pred[:, non_empty_idx]\n    nonempty_probs = 1 - empty_probs\n\n    # Remove unknown voxels\n    mask = ssc_target != ignore_index\n    nonempty_target = ssc_target != non_empty_idx\n    nonempty_target = nonempty_target[mask].float()\n    nonempty_probs = nonempty_probs[mask]\n    empty_probs = empty_probs[mask]\n\n    eps = 1e-5\n    intersection = (nonempty_target * nonempty_probs).sum()\n    precision = intersection / (nonempty_probs.sum()+eps)\n    recall = intersection / (nonempty_target.sum()+eps)\n    spec = ((1 - nonempty_target) * (empty_probs)).sum() / ((1 - nonempty_target).sum()+eps)\n    with autocast(False):\n        return (\n            F.binary_cross_entropy_with_logits(inverse_sigmoid(precision, 'A'), torch.ones_like(precision))\n            + F.binary_cross_entropy_with_logits(inverse_sigmoid(recall, 'B'), torch.ones_like(recall))\n            + F.binary_cross_entropy_with_logits(inverse_sigmoid(spec, 'C'), torch.ones_like(spec))\n        )\n\n\n\ndef sem_scal_loss(pred_, ssc_target, ignore_index=255):\n    # Get softmax probabilities\n    with autocast(False):\n        pred = F.softmax(pred_, dim=1)\n        loss = 0\n        count = 0\n        mask = ssc_target != ignore_index\n        n_classes = pred.shape[1]\n        begin = 1 if n_classes == 19 else 0\n        for i in range(begin, n_classes-1):   \n\n            # Get probability of class i\n            p = pred[:, i]  \n\n            # Remove unknown voxels\n            target_ori = ssc_target\n            p = p[mask]\n            target = ssc_target[mask]   \n\n            completion_target = torch.ones_like(target)\n            completion_target[target != i] = 0\n            completion_target_ori = torch.ones_like(target_ori).float()\n            completion_target_ori[target_ori != i] = 0\n            if torch.sum(completion_target) > 0:\n                count += 1.0\n                nominator = torch.sum(p * completion_target)\n                loss_class = 0\n                if torch.sum(p) > 0:\n                    precision = nominator / (torch.sum(p)+ 1e-5)\n                    loss_precision = F.binary_cross_entropy_with_logits(\n                            inverse_sigmoid(precision, 'D'), torch.ones_like(precision)\n                        )\n                    loss_class += loss_precision\n                if torch.sum(completion_target) > 0:\n                    recall = nominator / (torch.sum(completion_target) +1e-5)\n                    # loss_recall = F.binary_cross_entropy(recall, torch.ones_like(recall))\n\n                    loss_recall = F.binary_cross_entropy_with_logits(inverse_sigmoid(recall, 'E'), torch.ones_like(recall))\n                    loss_class += loss_recall\n                if torch.sum(1 - completion_target) > 0:\n                    specificity = torch.sum((1 - p) * (1 - completion_target)) / (\n                        torch.sum(1 - completion_target) +  1e-5\n                    )\n\n                    loss_specificity = F.binary_cross_entropy_with_logits(\n                            inverse_sigmoid(specificity, 'F'), torch.ones_like(specificity)\n                        )\n                    loss_class += loss_specificity\n                loss += loss_class\n                # print(i, loss_class, loss_recall, loss_specificity)\n        l = loss/count\n        if torch.isnan(l):\n            from IPython import embed\n            embed()\n            exit()\n        return l\n\n\ndef CE_ssc_loss(pred, target, class_weights=None, ignore_index=255):\n    \"\"\"\n    :param: prediction: the predicted tensor, must be [BS, C, ...]\n    \"\"\"\n\n    criterion = nn.CrossEntropyLoss(\n        weight=class_weights, ignore_index=ignore_index, reduction=\"mean\"\n    )\n    # from IPython import embed\n    # embed()\n    # exit()\n    with autocast(False):\n        loss = criterion(pred, target.long())\n\n    return loss\n\ndef vel_loss(pred, gt):\n    with autocast(False):\n        return F.l1_loss(pred, gt)\n\n\n\n"
  },
  {
    "path": "mmdet3d/models/fbbev/modules/resnet3d.py",
    "content": "import math\nfrom functools import partial\nfrom mmdet3d.models.builder import BACKBONES\nfrom mmcv.cnn import build_conv_layer, build_norm_layer, build_upsample_layer\nfrom mmcv.runner import BaseModule\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torch.utils.checkpoint import checkpoint as cp\nimport pdb\nfrom mmcv.runner import BaseModule\nimport spconv.pytorch as spconv\nfrom spconv.pytorch import functional as Fsp\nfrom mmcv.runner import BaseModule, force_fp32\ndef get_inplanes():\n    return [64, 128, 256, 512]\n\nBIAS = True\ndef conv3x3x3(in_planes, out_planes, stride=1, use_spase_3dtensor=False):\n    if not use_spase_3dtensor:\n        Conv3d = nn.Conv3d\n    else:\n        Conv3d = spconv.SparseConv3d if stride!=1 else spconv.SubMConv3d\n\n    return Conv3d(in_planes,\n                     out_planes,\n                     kernel_size=3,\n                     stride=stride,\n                     padding=1,\n                     bias=BIAS)\n\n\ndef conv1x1x1(in_planes, out_planes, stride=1, use_spase_3dtensor=False):\n    if not use_spase_3dtensor:\n        Conv3d = nn.Conv3d\n    else:\n        Conv3d = spconv.SparseConv3d if stride!=1 else spconv.SubMConv3d\n\n    return Conv3d(in_planes,\n                     out_planes,\n                     kernel_size=1,\n                     stride=stride,\n                     bias=BIAS)\n\n\nclass BasicBlock(BaseModule):\n    expansion = 1\n\n    def __init__(self, in_planes, planes, stride=1, downsample=None, norm_cfg=None, use_spase_3dtensor=False):\n        super().__init__()\n\n        self.use_spase_3dtensor = use_spase_3dtensor\n        self.relu = nn.ReLU(inplace=False)\n        self.downsample = downsample\n\n        if self.use_spase_3dtensor:\n            Sequential = spconv.SparseSequential\n\n            conv1 = conv3x3x3(in_planes, planes, stride, use_spase_3dtensor=self.use_spase_3dtensor)\n            bn1 = build_norm_layer(norm_cfg, planes)[1]\n            relu = nn.ReLU(inplace=True)\n            conv2 = conv3x3x3(planes, planes, use_spase_3dtensor=self.use_spase_3dtensor)\n            bn2 = build_norm_layer(norm_cfg, planes)[1]\n            layer_list = [conv1, bn1, relu, conv2, bn2]\n            \n            self.layer_seq = Sequential(*layer_list)\n        else:\n            self.conv1 = conv3x3x3(in_planes, planes, stride, use_spase_3dtensor=self.use_spase_3dtensor)\n            self.bn1 = build_norm_layer(norm_cfg, planes)[1]\n            \n            self.conv2 = conv3x3x3(planes, planes, use_spase_3dtensor=self.use_spase_3dtensor)\n            self.bn2 = build_norm_layer(norm_cfg, planes)[1]\n\n        self.stride = stride\n\n    @force_fp32()\n    def forward(self, x, debug=False):\n        residual = x\n\n        if self.use_spase_3dtensor:\n            out = self.layer_seq(x)\n            if self.downsample is not None:\n                residual = self.downsample(x)\n            out = Fsp.sparse_add(out, residual)\n            out = out.replace_feature(self.relu(out.features))\n            return out\n        else:\n            out = self.conv1(x)\n            out = self.bn1(out)\n            out = self.relu(out)\n\n            out = self.conv2(out)\n            out = self.bn2(out)\n\n            if self.downsample is not None:\n                residual = self.downsample(x)\n            out += residual\n            out = self.relu(out)\n            return out\n\n\n\n\nclass Bottleneck(BaseModule):\n    expansion = 4\n\n    def __init__(self, in_planes, planes, stride=1, downsample=None, norm_cfg=None):\n        super().__init__()\n\n        self.conv1 = conv1x1x1(in_planes, planes)\n        self.bn1 = build_norm_layer(norm_cfg, planes)[1]\n        self.conv2 = conv3x3x3(planes, planes, stride)\n        self.bn2 = build_norm_layer(norm_cfg, planes)[1]\n        self.conv3 = conv1x1x1(planes, planes * self.expansion)\n        self.bn3 = build_norm_layer(norm_cfg, planes * self.expansion)[1]\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n        self.stride = stride\n    @force_fp32()\n    def forward(self, x):\n        residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        if self.downsample is not None:\n            residual = self.downsample(x)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n@BACKBONES.register_module()\nclass CustomResNet3D(BaseModule):\n    def __init__(self,\n                 depth,\n                 block_inplanes=[64, 128, 256, 512],\n                 block_strides=[1, 2, 2, 2],\n                 out_indices=(0, 1, 2, 3),\n                 n_input_channels=3,\n                 shortcut_type='B',\n                 with_cp=False,\n                 norm_cfg=dict(type='BN3d', requires_grad=True),\n                 use_spase_3dtensor=False,\n                 plane2voxel=None,\n                 widen_factor=1.0):\n        super().__init__()\n        \n        layer_metas = {\n            10: [1, 1, 1, 1],\n            18: [2, 2, 2, 2],\n            34: [3, 4, 6, 3],\n            50: [3, 4, 6, 3],\n            101: [3, 4, 23, 3],\n        }\n        \n        if depth in [10, 18, 34]:\n            block = BasicBlock\n        else:\n            assert depth in [50, 101]\n            block = Bottleneck\n        \n        self.with_cp = with_cp \n        self.plane2voxel = plane2voxel\n        \n            \n        layers = layer_metas[depth]\n        self.use_spase_3dtensor = use_spase_3dtensor\n        block_inplanes = [int(x * widen_factor) for x in block_inplanes]\n        self.in_planes = block_inplanes[0]\n        self.out_indices = out_indices\n        \n        # replace the first several downsampling layers with the channel-squeeze layers\n        Conv3d = nn.Conv3d if not self.use_spase_3dtensor else spconv.SubMConv3d\n        Sequential = nn.Sequential if not self.use_spase_3dtensor else spconv.SparseSequential\n        if self.use_spase_3dtensor:\n            norm_cfg['type'] = 'BN1d'\n\n        self.input_proj = Sequential(\n            Conv3d(n_input_channels, self.in_planes, kernel_size=(1, 1, 1),\n                      stride=(1, 1, 1), bias=False),\n            build_norm_layer(norm_cfg, self.in_planes)[1],\n            nn.ReLU(inplace=True),\n        )\n        \n        self.layers = nn.ModuleList()\n        for i in range(len(block_inplanes)):\n            self.layers.append(self._make_layer(block, block_inplanes[i], layers[i], \n                                shortcut_type, block_strides[i], norm_cfg=norm_cfg))\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv3d):\n                nn.init.kaiming_normal_(m.weight,\n                                        mode='fan_out',\n                                        nonlinearity='relu')\n            \n            elif isinstance(m, nn.BatchNorm3d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n\n    def _downsample_basic_block(self, x, planes, stride):\n        out = F.avg_pool3d(x, kernel_size=1, stride=stride)\n        zero_pads = torch.zeros(out.size(0), planes - out.size(1), out.size(2),\n                                out.size(3), out.size(4))\n        if isinstance(out.data, torch.cuda.FloatTensor):\n            zero_pads = zero_pads.cuda()\n\n        out = torch.cat([out.data, zero_pads], dim=1)\n\n        return out\n\n    def _make_layer(self, block, planes, blocks, shortcut_type, stride=1, norm_cfg=None):\n        downsample = None\n        Sequential = nn.Sequential if not self.use_spase_3dtensor else spconv.SparseSequential\n        if stride != 1 or self.in_planes != planes * block.expansion:\n            if shortcut_type == 'A':\n                downsample = partial(self._downsample_basic_block,\n                                     planes=planes * block.expansion,\n                                     stride=stride)\n            else:\n                \n                downsample = Sequential(\n                    conv1x1x1(self.in_planes, planes * block.expansion, stride, self.use_spase_3dtensor),\n                    build_norm_layer(norm_cfg, planes * block.expansion)[1])\n\n        layers = []\n        layers.append(\n            block(in_planes=self.in_planes,\n                  planes=planes,\n                  stride=stride,\n                  downsample=downsample,\n                  use_spase_3dtensor = self.use_spase_3dtensor,\n                  norm_cfg=norm_cfg))\n        self.in_planes = planes * block.expansion\n        for i in range(1, blocks):\n            layers.append(block(self.in_planes, planes, norm_cfg=norm_cfg, use_spase_3dtensor = self.use_spase_3dtensor))\n\n        return Sequential(*layers)\n    \n    @force_fp32()\n    def forward(self, x):\n        if self.plane2voxel is not None:\n            x = x.unsqueeze(-1).repeat(1, 1, 1, 1, self.plane2voxel)\n        x = self.input_proj(x)\n        res = []\n        for index, layer in enumerate(self.layers):\n            if self.use_spase_3dtensor:\n                for block in layer:\n                    if self.with_cp:\n                        x = cp(block, x)\n                    else:\n                        x = block(x)\n            else:\n                if self.with_cp:\n                    x = cp(layer, x)\n                else:\n                    x = layer(x)\n            \n            if index in self.out_indices:\n                if self.use_spase_3dtensor:\n                    res.append(x.dense())\n                else:\n                    res.append(x)\n\n        return res\n\ndef generate_model(model_depth, **kwargs):\n    assert model_depth in [10, 18, 34, 50, 101, 152, 200]\n\n    if model_depth == 10:\n        model = ResNet(BasicBlock, [1, 1, 1, 1], get_inplanes(), **kwargs)\n    elif model_depth == 18:\n        model = ResNet(BasicBlock, [2, 2, 2, 2], get_inplanes(), **kwargs)\n    elif model_depth == 34:\n        model = ResNet(BasicBlock, [3, 4, 6, 3], get_inplanes(), **kwargs)\n    elif model_depth == 50:\n        model = ResNet(Bottleneck, [3, 4, 6, 3], get_inplanes(), **kwargs)\n    elif model_depth == 101:\n        model = ResNet(Bottleneck, [3, 4, 23, 3], get_inplanes(), **kwargs)\n    elif model_depth == 152:\n        model = ResNet(Bottleneck, [3, 8, 36, 3], get_inplanes(), **kwargs)\n    elif model_depth == 200:\n        model = ResNet(Bottleneck, [3, 24, 36, 3], get_inplanes(), **kwargs)\n\n    return model"
  },
  {
    "path": "mmdet3d/models/fbbev/motion_head/__init__.py",
    "content": "from .motion_head import MotionHead\nfrom .motion_planner_head import MotionPlannerHead\nfrom .traj_loss import TrajLoss"
  },
  {
    "path": "mmdet3d/models/fbbev/motion_head/motion_head.py",
    "content": "import torch\nimport torch.nn as nn \nfrom mmcv.cnn import Linear, bias_init_with_prob, Scale\n\nfrom mmcv.runner import force_fp32\nfrom mmdet.core import (build_assigner, build_sampler, multi_apply,\n                        reduce_mean)\nfrom mmdet.models.utils import build_transformer\nfrom mmdet.models import HEADS, build_loss\nfrom mmdet.models.dense_heads.anchor_free_head import AnchorFreeHead\nfrom mmdet.models.utils.transformer import inverse_sigmoid\nfrom mmdet3d.core.bbox.coders import build_bbox_coder\n# from .streampetr_utils import *\nimport copy\nfrom mmdet.models.utils import NormedLinear\nfrom mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d\nfrom mmdet3d.models.fbbev.utils import save_tensor\nfrom mmcv.runner.base_module import BaseModule\nfrom mmcv.cnn.bricks.transformer import build_transformer_layer_sequence\n\ndef get_ego_pos(points, pc_range):\n    if points.size(-1) == 3:\n        points = points * (pc_range[3:6] - pc_range[0:3]) + pc_range[0:3]\n    elif  points.size(-1) == 2:\n        points = points * (pc_range[3:5] - pc_range[0:2]) + pc_range[0:2]\n    return points\n\ndef get_rel_pos(points, pc_range):\n    if points.size(-1) == 3:\n        return (points - pc_range[0:3]) / (pc_range[3:6] - pc_range[0:3])\n    elif  points.size(-1) == 2:\n        return (points - pc_range[0:2]) / (pc_range[3:5] - pc_range[0:2])\n\n\n@HEADS.register_module()\nclass MotionHead(BaseModule):\n    \"\"\"Implements the DETR transformer head.\n    See `paper: End-to-End Object Detection with Transformers\n    <https://arxiv.org/pdf/2005.12872>`_ for details.\n    Args:\n        num_classes (int): Number of categories excluding the background.\n        in_channels (int): Number of channels in the input feature map.\n        num_query (int): Number of query in Transformer.\n        num_reg_fcs (int, optional): Number of fully-connected layers used in\n            `FFN`, which is then used for the regression head. Default 2.\n        transformer (obj:`mmcv.ConfigDict`|dict): Config for transformer.\n            Default: None.\n        sync_cls_avg_factor (bool): Whether to sync the avg_factor of\n            all ranks. Default to False.\n        positional_encoding (obj:`mmcv.ConfigDict`|dict):\n            Config for position encoding.\n        loss_cls (obj:`mmcv.ConfigDict`|dict): Config of the\n            classification loss. Default `CrossEntropyLoss`.\n        loss_bbox (obj:`mmcv.ConfigDict`|dict): Config of the\n            regression loss. Default `L1Loss`.\n        loss_iou (obj:`mmcv.ConfigDict`|dict): Config of the\n            regression iou loss. Default `GIoULoss`.\n        tran_cfg (obj:`mmcv.ConfigDict`|dict): Training config of\n            transformer head.\n        test_cfg (obj:`mmcv.ConfigDict`|dict): Testing config of\n            transformer head.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n    _version = 2\n\n    def __init__(self,\n                 # num_classes=1,\n                 in_channels=256,\n                 stride=[16],\n                 embed_dims=256,\n                 num_query=6,\n                 num_reg_fcs=2,\n                 memory_len=12,\n                 topk_proposals=4,\n                 num_propagated=0,\n                 with_dn=True,\n                 with_ego_pos=True,\n                 match_with_velo=True,\n                 match_costs=None,\n                 transformer=None,\n                 sync_cls_avg_factor=False,\n                 code_weights=None,\n                 bbox_coder=None,\n                 loss_traj=dict(type='L1Loss', loss_weight=0.25),\n                 init_cfg=None,\n                 normedlinear=False,\n                 point_cloud_range=None,\n                 agent_decoder=dict(),\n                 agent_map_decoder=dict(),\n                 map_layer_index = -1,\n                 **kwargs):\n\n        if 'code_size' in kwargs:\n            self.code_size = kwargs['code_size']\n        else:\n            self.code_size = 2\n        # if code_weights is not None:\n        #     self.code_weights = code_weights\n        # else:\n        #     self.code_weights = [1.0, 1.0] # x, y, v_x, v_y\n        # self.code_weights = self.code_weights[:self.code_size]\n\n\n        self.traj_num_cls = 1\n\n        self.num_query = num_query\n        self.in_channels = in_channels\n        self.num_reg_fcs = num_reg_fcs\n        # self.train_cfg = train_cfg\n        # self.test_cfg = test_cfg\n        self.fp16_enabled = False\n        self.embed_dims = embed_dims\n        self.map_layer_index = map_layer_index\n\n\n        super(MotionHead, self).__init__()\n\n        self.loss_traj = build_loss(loss_traj)\n        self.log_softmax = nn.LogSoftmax(dim=2)\n        # self.code_weights = nn.Parameter(torch.tensor(\n        #     self.code_weights), requires_grad=False)\n        self.pc_range = nn.Parameter(torch.tensor(\n            point_cloud_range), requires_grad=False)\n\n        self.fut_steps = 8\n        self.num_fut_mode = 6\n\n\n        self.agent_decoder = build_transformer_layer_sequence(agent_decoder)\n        self.agent_map_decoder = build_transformer_layer_sequence(agent_map_decoder)\n\n        self._init_layers()\n        self.count = 0\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the transformer head.\"\"\"\n\n        traj_branch = []\n        for _ in range(self.num_reg_fcs):\n            traj_branch.append(Linear(self.embed_dims*2, self.embed_dims*2))\n            traj_branch.append(nn.ReLU())\n        traj_branch.append(Linear(self.embed_dims*2, self.fut_steps*self.code_size))\n        traj_branch = nn.Sequential(*traj_branch)\n\n        traj_cls_branch = []\n        for _ in range(self.num_reg_fcs):\n            traj_cls_branch.append(Linear(self.embed_dims*2, self.embed_dims*2))\n            traj_cls_branch.append(nn.LayerNorm(self.embed_dims*2))\n            traj_cls_branch.append(nn.ReLU(inplace=True))\n        traj_cls_branch.append(Linear(self.embed_dims*2, self.traj_num_cls))\n        traj_cls_branch = nn.Sequential(*traj_cls_branch)\n\n        def _get_clones(module, N):\n            return nn.ModuleList([copy.deepcopy(module) for i in range(N)])\n\n        motion_num_pred = 2\n        self.traj_branches = _get_clones(traj_branch, motion_num_pred)\n        self.traj_cls_branches = _get_clones(traj_cls_branch, motion_num_pred)\n\n        # self.reference_points = nn.Embedding(self.num_query, 3)\n        self.agent_info = MLN(17)\n        self.agent_info_embedding = nn.Sequential(\n            nn.Linear(17, self.embed_dims),\n            nn.ReLU(),\n            nn.Linear(self.embed_dims, self.embed_dims),\n        )\n\n        self.traj_mode_embedding = nn.Embedding(self.num_fut_mode, self.embed_dims)\n\n\n    def forward(self, agent_instances, preds_map_dicts, img_metas=None):\n        \n        valid_length = [(matched_gt_idxes>=0).sum() for matched_gt_idxes in agent_instances.matched_gt_idxes]\n\n        max_valid_query = max(valid_length)\n        \n        assert 0<=max_valid_query<=250\n        agent_instances = agent_instances[:, :max_valid_query]\n        agent_queries = agent_instances.query_feats\n        agent_reference_points = agent_instances.reference_points\n        mode_embedding = self.traj_mode_embedding.weight\n        hist_mask = agent_instances.hist_mask\n        B = len(agent_instances)\n        hist_xyz_delta = (agent_instances.hist_xyz[:, :, 1:] - agent_instances.hist_xyz[:, :, :-1]) * hist_mask[:,:, :-1, None]\n        agent_hist_info = torch.cat([hist_xyz_delta.flatten(-2, -1), agent_instances.hist_velo.flatten(-2, -1)], -1).detach()\n        \n        # I do believe this agent history infomation can be helpfull, so I use it twice\n        agent_queries = self.agent_info(agent_queries, agent_hist_info)\n        extra_agent_infos = (self.agent_info_embedding(agent_hist_info)[:, :, None, :].repeat(1, 1, self.num_fut_mode, 1)).flatten(1, 2)\n\n        agent_queries = (agent_queries[:, :, None, :] + mode_embedding[None, None, :, :]).flatten(1, 2)\n\n        hist_traj_points = agent_instances.hist_xyz.unsqueeze(2).repeat(1, 1, self.num_fut_mode, 1, 1).flatten(1, 2)\n        hist_agent_xy = agent_instances.reference_points[:, :, :2].unsqueeze(2).repeat(1, 1, self.num_fut_mode, 1).flatten(1, 2)\n\n        agent_queries = self.agent_decoder(agent_queries, reference_points_q=hist_traj_points, reference_points_v=hist_traj_points, pc_range=self.pc_range)\n        \n        map_queries = preds_map_dicts[self.map_layer_index]['queries'].clone()\n        map_lines = preds_map_dicts[self.map_layer_index]['lines'].clone()\n        map_scores = preds_map_dicts[self.map_layer_index]['scores'].clone()\n        B, NMQ, K2 = map_lines.shape\n        map_lines = map_lines.reshape(B, NMQ, K2//2, 2)\n        map_lines = get_ego_pos(map_lines, self.pc_range)\n\n        co_agent_queries = torch.cat([agent_queries, extra_agent_infos], -1)\n        pred_traj_cls = self.traj_cls_branches[0](co_agent_queries).view(B, max_valid_query, self.num_fut_mode)\n        pred_traj_cls = self.log_softmax(pred_traj_cls)\n        pred_traj = self.traj_branches[0](co_agent_queries)\n        B, N, PK = pred_traj.shape\n        pred_traj = pred_traj.view(B, N, PK//self.code_size, self.code_size)\n\n        fut_traj_points = torch.cat([hist_agent_xy.unsqueeze(-2), pred_traj[..., :2]], -2)\n        fut_traj_points = torch.cumsum(fut_traj_points, -2)[:, :, 1:]\n\n        agent_queries = self.agent_map_decoder(agent_queries, map_queries, map_queries, reference_points_q=fut_traj_points, reference_points_v=map_lines, pc_range=self.pc_range, map_scores=map_scores)\n        co_agent_queries = torch.cat([agent_queries, extra_agent_infos], -1)\n        pred_opt_traj_cls = self.traj_cls_branches[1](co_agent_queries).view(B, max_valid_query, self.num_fut_mode)\n        pred_opt_traj_cls = self.log_softmax(pred_opt_traj_cls)\n        pred_opt_traj = self.traj_branches[1](co_agent_queries)\n\n        pred_opt_traj = pred_opt_traj.view(B, N, PK//self.code_size, self.code_size)\n\n        fut_opt_traj_points = torch.cat([hist_agent_xy.unsqueeze(-2), pred_opt_traj[..., :2]], -2)\n        fut_opt_traj_points = torch.cumsum(fut_opt_traj_points, -2)[:, :, 1:]\n\n        return dict(\n            pred_trajs=[\n                dict(\n                    pred_traj=pred_traj.view(B, N//self.num_fut_mode, self.num_fut_mode,  PK//self.code_size, self.code_size),\n                    pred_traj_cls=pred_traj_cls,\n                    valid_length=valid_length,\n                ),\n                dict(\n                    pred_traj=pred_opt_traj.view(B, N//self.num_fut_mode, self.num_fut_mode, PK//self.code_size, self.code_size),\n                    pred_traj_cls=pred_opt_traj_cls,\n                    valid_length=valid_length,\n            )],\n            pred_abs_trajs = fut_traj_points,\n            pred_abs_trajs2 = fut_opt_traj_points,\n            obj_idxes = agent_instances.obj_idxes.clone(),\n            motion_queries = agent_queries,\n            agent_logits = agent_instances.logits.clone()\n            )\n        \n    @force_fp32(apply_to=('preds_dicts'))\n    def loss(self,\n             gt_agent_fut_traj,\n             gt_agent_fut_traj_mask,\n             preds_dicts,\n             matched_gt_idxes=None,\n             img_metas=None,\n            ):\n\n        loss_dict = dict()\n        gt_agent_fut_traj_list = []\n        gt_agent_fut_traj_mask_list = []\n        B = len(gt_agent_fut_traj)\n        pred_trajs = preds_dicts['pred_trajs']\n        valid_length = pred_trajs[0]['valid_length']\n\n        for i in range(B):\n            index = matched_gt_idxes[i][:valid_length[i]]\n            if valid_length[i]>0:\n                gt_agent_fut_traj_list.append(gt_agent_fut_traj[i][:valid_length[i]][index])\n                gt_agent_fut_traj_mask_list.append(gt_agent_fut_traj_mask[i][:valid_length[i]][index])\n\n        # from IPython import embed\n        # embed()\n        # exit()\n\n        gt_agent_fut_traj = torch.cat(gt_agent_fut_traj_list)\n        gt_agent_fut_traj_mask = torch.cat(gt_agent_fut_traj_mask_list).sum(-1) > 0\n\n        for lld, single_preds in  enumerate(pred_trajs):\n            pred_traj = single_preds['pred_traj']\n            pred_traj_cls = single_preds['pred_traj_cls']\n            pred_agent_fut_traj_list = []\n            pred_agent_fut_traj_cls_list = []\n\n            for i in range(B):\n                if valid_length[i]>0:\n                    pred_agent_fut_traj_list.append(pred_traj[i][:valid_length[i]])\n                    pred_agent_fut_traj_cls_list.append(pred_traj_cls[i][:valid_length[i]])\n\n            pred_traj = torch.cat(pred_agent_fut_traj_list)\n            pred_traj_cls = torch.cat(pred_agent_fut_traj_cls_list)\n            loss_traj, l_class, l_reg, l_minade, l_minfde, l_mr = self.loss_traj(pred_traj_cls, pred_traj, gt_agent_fut_traj, gt_agent_fut_traj_mask)\n            loss_dict.update({\n                    f'loss_traj.d{lld}': loss_traj,\n                    f'l_class.d{lld}': l_class,\n                    f'l_reg.d{lld}': l_reg,\n                    f'l_minade.d{lld}': l_minade,\n                    f'l_minfde.d{lld}': l_minfde,\n                    f'l_mr.d{lld}': l_mr,\n                }\n            )\n        return loss_dict\n\n\n    @force_fp32(apply_to=('preds_dicts'))\n    def get_bboxes(self, preds_dicts, img_metas,  rescale=False):\n        \"\"\"Generate bboxes from bbox head predictions.\n        Args:\n            preds_dicts (tuple[list[dict]]): Prediction results.\n            img_metas (list[dict]): Point cloud and image's meta info.\n        Returns:\n            list[dict]: Decoded bbox, scores and labels after nms.\n        \"\"\"\n        num_samples = len(img_metas)\n        fut_traj_points = preds_dicts['pred_abs_trajs'].view(num_samples, -1, self.num_fut_mode,  self.fut_steps, self.code_size)\n        # fut_traj_index = preds_dicts['pred_trajs'][0]['pred_traj_cls'].softmax(-1).argmax(-1)\n        scores, fut_traj_index = torch.max(preds_dicts['pred_trajs'][0]['pred_traj_cls'].softmax(-1), dim=-1)\n        inds_rep = fut_traj_index.repeat(\n            self.fut_steps, self.code_size, 1, 1).permute(2, 3, 0, 1)\n        fut_traj_points = fut_traj_points.gather(2, inds_rep.unsqueeze(2)).squeeze(dim=2)\n\n        fut_traj_points2 = preds_dicts['pred_abs_trajs2'].view(num_samples, -1, self.num_fut_mode,  self.fut_steps, self.code_size)\n        # fut_traj_index = preds_dicts['pred_trajs'][0]['pred_traj_cls'].softmax(-1).argmax(-1)\n        scores, fut_traj_index = torch.max(preds_dicts['pred_trajs'][1]['pred_traj_cls'].softmax(-1), dim=-1)\n        inds_rep = fut_traj_index.repeat(\n            self.fut_steps, self.code_size, 1, 1).permute(2, 3, 0, 1)\n        fut_traj_points2 = fut_traj_points2.gather(2, inds_rep.unsqueeze(2)).squeeze(dim=2)\n\n        ret_list = [] \n        for i in range(num_samples):\n            ret_list.append(\n                dict(\n                    pred_agent_fut_trajs = fut_traj_points[i].cpu().numpy(),\n                    pred_agent_fut_trajs2 = fut_traj_points2[i].cpu().numpy(),\n                    obj_idxes = preds_dicts['obj_idxes'][i].cpu().numpy()\n                    )\n                )\n        return ret_list\n\nclass MLN(nn.Module):\n    ''' \n    Args:\n        c_dim (int): dimension of latent code c\n        f_dim (int): feature dimension\n    '''\n\n    def __init__(self, c_dim, f_dim=256, use_ln=True):\n        super().__init__()\n        self.c_dim = c_dim\n        self.f_dim = f_dim\n        self.use_ln = use_ln\n\n        self.reduce = nn.Sequential(\n            nn.Linear(c_dim, f_dim),\n            nn.ReLU(),\n        )\n        self.gamma = nn.Linear(f_dim, f_dim)\n        self.beta = nn.Linear(f_dim, f_dim)\n        if self.use_ln:\n            self.ln = nn.LayerNorm(f_dim, elementwise_affine=False)\n        self.init_weight()\n\n    def init_weight(self):\n        nn.init.zeros_(self.gamma.weight)\n        nn.init.zeros_(self.beta.weight)\n        nn.init.ones_(self.gamma.bias)\n        nn.init.zeros_(self.beta.bias)\n\n    def forward(self, x, c):\n        if self.use_ln:\n            x = self.ln(x)\n        c = self.reduce(c)\n        gamma = self.gamma(c)\n        beta = self.beta(c)\n        out = gamma * x + beta\n\n        return out"
  },
  {
    "path": "mmdet3d/models/fbbev/motion_head/motion_planner_head.py",
    "content": "import torch\nimport torch.nn as nn \nfrom mmcv.cnn import Linear, bias_init_with_prob, Scale\n\nfrom mmcv.runner import force_fp32\nfrom mmdet.core import (build_assigner, build_sampler, multi_apply,\n                        reduce_mean)\nfrom mmdet.models.utils import build_transformer\nfrom mmdet.models import HEADS, build_loss\nfrom mmdet.models.dense_heads.anchor_free_head import AnchorFreeHead\nfrom mmdet.models.utils.transformer import inverse_sigmoid\nfrom mmdet3d.core.bbox.coders import build_bbox_coder\n# from .streampetr_utils import *\nimport copy\nfrom mmdet.models.utils import NormedLinear\nfrom mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d\nfrom mmdet3d.models.fbbev.utils import save_tensor\nfrom mmcv.runner.base_module import BaseModule\nfrom mmcv.cnn.bricks.transformer import build_transformer_layer_sequence\nfrom ..streampetr.streampetr_utils import *\nfrom ..planner_head.metric_stp3 import PlanningMetric\n\ndef get_ego_pos(points, pc_range):\n    if points.size(-1) == 3:\n        points = points * (pc_range[3:6] - pc_range[0:3]) + pc_range[0:3]\n    elif  points.size(-1) == 2:\n        points = points * (pc_range[3:5] - pc_range[0:2]) + pc_range[0:2]\n    return points\n\ndef get_rel_pos(points, pc_range):\n    if points.size(-1) == 3:\n        return (points - pc_range[0:3]) / (pc_range[3:6] - pc_range[0:3])\n    elif  points.size(-1) == 2:\n        return (points - pc_range[0:2]) / (pc_range[3:5] - pc_range[0:2])\n\n\n@HEADS.register_module()\nclass MotionPlannerHead(BaseModule):\n    \"\"\"Implements the DETR transformer head.\n    See `paper: End-to-End Object Detection with Transformers\n    <https://arxiv.org/pdf/2005.12872>`_ for details.\n    Args:\n        num_classes (int): Number of categories excluding the background.\n        in_channels (int): Number of channels in the input feature map.\n        num_query (int): Number of query in Transformer.\n        num_reg_fcs (int, optional): Number of fully-connected layers used in\n            `FFN`, which is then used for the regression head. Default 2.\n        transformer (obj:`mmcv.ConfigDict`|dict): Config for transformer.\n            Default: None.\n        sync_cls_avg_factor (bool): Whether to sync the avg_factor of\n            all ranks. Default to False.\n        positional_encoding (obj:`mmcv.ConfigDict`|dict):\n            Config for position encoding.\n        loss_cls (obj:`mmcv.ConfigDict`|dict): Config of the\n            classification loss. Default `CrossEntropyLoss`.\n        loss_bbox (obj:`mmcv.ConfigDict`|dict): Config of the\n            regression loss. Default `L1Loss`.\n        loss_iou (obj:`mmcv.ConfigDict`|dict): Config of the\n            regression iou loss. Default `GIoULoss`.\n        tran_cfg (obj:`mmcv.ConfigDict`|dict): Training config of\n            transformer head.\n        test_cfg (obj:`mmcv.ConfigDict`|dict): Testing config of\n            transformer head.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n    _version = 2\n\n    def __init__(self,\n                 # num_classes=1,\n                 in_channels=256,\n                 stride=[16],\n                 embed_dims=256,\n                 num_query=6,\n                 num_reg_fcs=2,\n                 memory_len=12,\n                 topk_proposals=4,\n                 num_propagated=0,\n                 with_dn=True,\n                 with_ego_pos=True,\n                 match_with_velo=True,\n                 match_costs=None,\n                 transformer=None,\n                 sync_cls_avg_factor=False,\n                 code_weights=None,\n                 bbox_coder=None,\n                 loss_traj=dict(type='L1Loss', loss_weight=0.25),\n                 init_cfg=None,\n                 normedlinear=False,\n                 point_cloud_range=None,\n                 agent_decoder=dict(),\n                 agent_map_decoder=dict(),\n                 map_layer_index = -1,\n\n                # planner\n                 loss_plan_reg=dict(type='L1Loss', loss_weight=5.0),\n                 loss_plan_bound=dict(type='PlanMapBoundLoss', loss_weight=1.0, dis_thresh=5.0),\n                 loss_plan_col=dict(type='PlanCollisionLoss', loss_weight=5.0),\n                 loss_plan_dir=dict(type='PlanMapDirectionLoss', loss_weight=2.5),\n                 ego_agent_decoder = dict(\n                    type='CustomTransformerDecoder',\n                    num_layers=1,\n                    return_intermediate=False,\n                    transformerlayers=dict(\n                        type='BaseTransformerLayer',\n                        batch_first=True,\n                        attn_cfgs=dict(\n                            type='MotionSelfAttention',\n                            embed_dims=256,\n                            num_heads=8,\n                            dropout=0.1,\n                            dist_func_type='MDE',\n                            pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0],\n                            consider_map_quality=False,\n                        ),\n                        feedforward_channels=2048,\n                        ffn_dropout=0.1,\n                        operation_order=('cross_attn', 'norm', 'ffn', 'norm'))),\n                 ego_map_decoder = dict(\n                    type='CustomTransformerDecoder',\n                    num_layers=1,\n                    return_intermediate=False,\n                    transformerlayers=dict(\n                        type='BaseTransformerLayer',\n                        batch_first=True,\n                        attn_cfgs=dict(\n                            type='MotionSelfAttention',\n                            embed_dims=256,\n                            num_heads=8,\n                            dropout=0.1,\n                            dist_func_type='MDE',\n                            pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0],\n                            consider_map_quality=True,\n                        ),\n                    feedforward_channels=2048,\n                    ffn_dropout=0.1,\n                    operation_order=('cross_attn', 'norm', 'ffn', 'norm'))),\n                 ego_ego_decoder = dict(\n                    type='CustomTransformerDecoder',\n                    num_layers=1,\n                    return_intermediate=False,\n                    transformerlayers=dict(\n                        type='BaseTransformerLayer',\n                        batch_first=True,\n                        attn_cfgs=dict(\n                            type='MultiheadAttention',\n                            embed_dims=256,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                        feedforward_channels=1024,\n                        ffn_dropout=0.1,\n                        operation_order=('cross_attn', 'norm', 'ffn', 'norm'))),\n                 **kwargs):\n\n        if 'code_size' in kwargs:\n            self.code_size = kwargs['code_size']\n        else:\n            self.code_size = 2\n        # if code_weights is not None:\n        #     self.code_weights = code_weights\n        # else:\n        #     self.code_weights = [1.0, 1.0] # x, y, v_x, v_y\n        # self.code_weights = self.code_weights[:self.code_size]\n\n\n        self.traj_num_cls = 1\n\n        self.num_query = num_query\n        self.in_channels = in_channels\n        self.num_reg_fcs = num_reg_fcs\n        # self.train_cfg = train_cfg\n        # self.test_cfg = test_cfg\n        self.fp16_enabled = False\n        self.embed_dims = embed_dims\n        self.map_layer_index = map_layer_index\n\n\n        super(MotionPlannerHead, self).__init__()\n\n        self.loss_traj = build_loss(loss_traj)\n        self.log_softmax = nn.LogSoftmax(dim=2)\n        # self.code_weights = nn.Parameter(torch.tensor(\n        #     self.code_weights), requires_grad=False)\n        self.pc_range = nn.Parameter(torch.tensor(\n            point_cloud_range), requires_grad=False)\n\n        self.fut_steps = 8\n        self.num_fut_mode = 6\n\n\n        self.agent_decoder = build_transformer_layer_sequence(agent_decoder)\n        self.agent_map_decoder = build_transformer_layer_sequence(agent_map_decoder)\n\n        self._init_layers()\n        self.count = 0\n\n        # planner\n        self.ego_ego_decoder = build_transformer_layer_sequence(ego_ego_decoder)\n        self.ego_agent_decoder = build_transformer_layer_sequence(ego_agent_decoder)\n        self.ego_map_decoder = build_transformer_layer_sequence(ego_map_decoder)\n        self.ego_fut_steps = 6\n        self.ego_fut_mode = 3\n        self.memory_len = 4\n        self.loss_plan_reg = build_loss(loss_plan_reg)\n        loss_plan_bound.update(point_cloud_range=point_cloud_range)\n        loss_plan_col.update(point_cloud_range=point_cloud_range)\n        loss_plan_dir.update(point_cloud_range=point_cloud_range)\n        self.loss_plan_bound = build_loss(loss_plan_bound)\n        self.loss_plan_col = build_loss(loss_plan_col)\n        self.loss_plan_dir = build_loss(loss_plan_dir)\n        self.ego_info = MLN(3)\n        self._init_planer_layers()\n        self.memory_traj = None\n        self.planning_metric = PlanningMetric()\n        self.count = 0\n\n    def _init_planer_layers(self):\n        \"\"\"Initialize layers of the transformer head.\"\"\"\n\n        ego_fut_decoder = []\n        ego_fut_dec_in_dim = self.embed_dims*2\n        for _ in range(self.num_reg_fcs):\n            ego_fut_decoder.append(Linear(ego_fut_dec_in_dim, ego_fut_dec_in_dim))\n            ego_fut_decoder.append(nn.ReLU())\n        ego_fut_decoder.append(Linear(ego_fut_dec_in_dim, self.ego_fut_mode*self.ego_fut_steps*2))\n        self.ego_fut_decoder = nn.Sequential(*ego_fut_decoder)\n\n\n        self.query_embedding = nn.Sequential(\n            nn.Linear(self.embed_dims, self.embed_dims),\n            nn.ReLU(),\n            nn.Linear(self.embed_dims, self.embed_dims),\n        )\n\n        self.motion_query_mlp = nn.Sequential(\n            nn.Linear(2 * self.embed_dims * self.num_fut_mode , self.embed_dims),\n            nn.ReLU(),\n            nn.Linear(self.embed_dims, self.embed_dims),\n        )\n\n        self.query_feat_embedding = nn.Embedding(1, self.embed_dims)\n        self.memory_ego_embed = None\n        self.time_embedding = nn.Embedding(self.memory_len, self.embed_dims)\n        self.hist_ego_mlp = nn.Sequential(\n            nn.Linear(self.embed_dims * 2, self.embed_dims),\n            nn.ReLU(),\n            nn.Linear(self.embed_dims, self.embed_dims),\n        )\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the transformer head.\"\"\"\n\n        traj_branch = []\n        for _ in range(self.num_reg_fcs):\n            traj_branch.append(Linear(self.embed_dims*2, self.embed_dims*2))\n            traj_branch.append(nn.ReLU())\n        traj_branch.append(Linear(self.embed_dims*2, self.fut_steps*self.code_size))\n        traj_branch = nn.Sequential(*traj_branch)\n\n        traj_cls_branch = []\n        for _ in range(self.num_reg_fcs):\n            traj_cls_branch.append(Linear(self.embed_dims*2, self.embed_dims*2))\n            traj_cls_branch.append(nn.LayerNorm(self.embed_dims*2))\n            traj_cls_branch.append(nn.ReLU(inplace=True))\n        traj_cls_branch.append(Linear(self.embed_dims*2, self.traj_num_cls))\n        traj_cls_branch = nn.Sequential(*traj_cls_branch)\n\n        def _get_clones(module, N):\n            return nn.ModuleList([copy.deepcopy(module) for i in range(N)])\n\n        motion_num_pred = 2\n        self.traj_branches = _get_clones(traj_branch, motion_num_pred)\n        self.traj_cls_branches = _get_clones(traj_cls_branch, motion_num_pred)\n\n        # self.reference_points = nn.Embedding(self.num_query, 3)\n        self.agent_info = MLN(17)\n        self.agent_info_embedding = nn.Sequential(\n            nn.Linear(17, self.embed_dims),\n            nn.ReLU(),\n            nn.Linear(self.embed_dims, self.embed_dims),\n        )\n\n        self.traj_mode_embedding = nn.Embedding(self.num_fut_mode, self.embed_dims)\n\n    def pre_update_memory(self, data, fut_traj_from_velo):\n\n        x = 1-data['start_of_sequence'] # original prev_exist, so we need do `not`\n        B = x.size(0)\n        # refresh the memory when the scene changes\n        if self.memory_traj is None:\n            self.memory_traj =  fut_traj_from_velo.unsqueeze(1).repeat(1, self.memory_len, 1, 1) # * 0\n            self.memory_ego_embed = x.new_zeros(B, self.memory_len, self.embed_dims * 2)\n        else:\n            self.memory_traj = transform_reference_points(self.memory_traj, data['ego_pose_inv'], reverse=False)[..., :2]\n            self.memory_traj = memory_refresh(self.memory_traj[:, :self.memory_len], x) \n            for i in range(B):\n                # do not leak velo info，init all zeros\n                if not x[i]: self.memory_traj[i, 0] = fut_traj_from_velo[i] * 0\n            \n            self.memory_ego_embed = memory_refresh(self.memory_ego_embed[:, :self.memory_len], x)\n\n    def post_update_memory(self, data, ego_fut_trajs, ego_embeds):\n        self.memory_traj = torch.cat([ego_fut_trajs, self.memory_traj], dim=1)\n        self.memory_traj = torch.cat([self.memory_traj, torch.zeros_like(self.memory_traj[..., :1])], -1)\n        self.memory_traj = transform_reference_points(self.memory_traj, data['ego_pose'], reverse=False)\n        self.memory_ego_embed = torch.cat([ego_embeds, self.memory_ego_embed], dim=1).detach()\n    \n\n    def forward(self, \n            agent_instances,\n            preds_map_dicts,\n            img_metas=None,\n            gt_ego_lcf_feat=None,\n            gt_ego_fut_cmd=None,\n            gt_ego_his_traj=None,\n            gt_ego_fut_trajs=None):\n        \n        valid_length = [(matched_gt_idxes>=0).sum() for matched_gt_idxes in agent_instances.matched_gt_idxes]\n\n        max_valid_query = max(valid_length)\n        \n        assert 0<=max_valid_query<=250\n        agent_instances = agent_instances[:, :max_valid_query]\n        agent_queries = agent_instances.query_feats\n        agent_reference_points = agent_instances.reference_points\n        mode_embedding = self.traj_mode_embedding.weight\n        hist_mask = agent_instances.hist_mask\n        B = len(agent_instances)\n        hist_xyz_delta = (agent_instances.hist_xyz[:, :, 1:] - agent_instances.hist_xyz[:, :, :-1]) * hist_mask[:,:, :-1, None]\n        agent_hist_info = torch.cat([hist_xyz_delta.flatten(-2, -1), agent_instances.hist_velo.flatten(-2, -1)], -1).detach()\n        \n        # I do believe this agent history infomation can be helpfull, so I use it twice\n        agent_queries = self.agent_info(agent_queries, agent_hist_info)\n        extra_agent_infos = (self.agent_info_embedding(agent_hist_info)[:, :, None, :].repeat(1, 1, self.num_fut_mode, 1)).flatten(1, 2)\n\n        agent_queries = (agent_queries[:, :, None, :] + mode_embedding[None, None, :, :]).flatten(1, 2)\n\n        hist_traj_points = agent_instances.hist_xyz.unsqueeze(2).repeat(1, 1, self.num_fut_mode, 1, 1).flatten(1, 2)\n        hist_agent_xy = agent_instances.reference_points[:, :, :2].unsqueeze(2).repeat(1, 1, self.num_fut_mode, 1).flatten(1, 2)\n\n        agent_queries = self.agent_decoder(agent_queries, reference_points_q=hist_traj_points, reference_points_v=hist_traj_points, pc_range=self.pc_range)\n        \n        map_queries = preds_map_dicts['queries'].clone()\n        map_lines = preds_map_dicts['lines'].clone()\n        map_scores = preds_map_dicts['scores'].clone()\n        B, NMQ, K2 = map_lines.shape\n        map_lines = map_lines.reshape(B, NMQ, K2//2, 2)\n        map_pos = self.query_embedding(bevpos2posemb(map_lines.mean(-2)))\n        map_lines = get_ego_pos(map_lines, self.pc_range)\n\n        co_agent_queries = torch.cat([agent_queries, extra_agent_infos], -1)\n        pred_traj_cls = self.traj_cls_branches[0](co_agent_queries).view(B, max_valid_query, self.num_fut_mode)\n        pred_traj_cls = self.log_softmax(pred_traj_cls)\n        pred_traj = self.traj_branches[0](co_agent_queries)\n        B, N, PK = pred_traj.shape\n        pred_traj = pred_traj.view(B, N, PK//self.code_size, self.code_size)\n\n        fut_traj_points = torch.cat([hist_agent_xy.unsqueeze(-2), pred_traj[..., :2]], -2)\n        fut_traj_points = torch.cumsum(fut_traj_points, -2)[:, :, 1:]\n\n        agent_queries = self.agent_map_decoder(agent_queries, map_queries, map_queries, reference_points_q=fut_traj_points, reference_points_v=map_lines, pc_range=self.pc_range, map_scores=map_scores)\n        co_agent_queries = torch.cat([agent_queries, extra_agent_infos], -1)\n        pred_opt_traj_cls = self.traj_cls_branches[1](co_agent_queries).view(B, max_valid_query, self.num_fut_mode)\n        pred_opt_traj_cls = self.log_softmax(pred_opt_traj_cls)\n        pred_opt_traj = self.traj_branches[1](co_agent_queries)\n\n        pred_opt_traj = pred_opt_traj.view(B, N, PK//self.code_size, self.code_size)\n\n        fut_opt_traj_points = torch.cat([hist_agent_xy.unsqueeze(-2), pred_opt_traj[..., :2]], -2)\n        fut_opt_traj_points = torch.cumsum(fut_opt_traj_points, -2)[:, :, 1:]\n\n\n        # planner\n        bs, num_agents = B, N//self.num_fut_mode\n        agent_queries = self.motion_query_mlp(co_agent_queries.view(bs, num_agents, 2 * self.embed_dims * self.num_fut_mode))\n        agent_reference_points = fut_opt_traj_points.view(bs, num_agents, self.num_fut_mode, 8, 2).mean(2)\n        agent_centers = get_rel_pos(agent_reference_points[:, :, 0], self.pc_range)\n        agent_pos = self.query_embedding(bevpos2posemb(agent_centers))\n\n        gt_ego_lcf_feat = torch.stack(gt_ego_lcf_feat).to(agent_queries.device)\n        gt_ego_fut_cmd = torch.stack(gt_ego_fut_cmd).to(agent_queries.device)\n\n        start_of_sequence = torch.FloatTensor([\n            single_img_metas['start_of_sequence'] \n            for single_img_metas in img_metas]).to(agent_queries.device)\n\n        timestamp = torch.FloatTensor([\n            single_img_metas['timestamp'] \n            for single_img_metas in img_metas]).to(agent_queries.device)\n\n        ego_pose_inv = torch.stack([\n            single_img_metas['ego_pose_inv'] \n            for single_img_metas in img_metas], 0).to(agent_queries.device)\n\n        ego_pose = torch.stack([\n            single_img_metas['ego_pose'] \n            for single_img_metas in img_metas], 0).to(agent_queries.device)\n\n        data = dict(\n            start_of_sequence = start_of_sequence,\n            timestamp = timestamp,\n            ego_pose_inv = ego_pose_inv,\n            ego_pose = ego_pose,\n        )\n        fut_traj_from_velo = gt_ego_lcf_feat[:, :2].unsqueeze(1).repeat(1, self.ego_fut_steps, 1) * torch.arange(1, self.ego_fut_steps+1)[None,:, None].to(agent_queries.device) * 0.5\n\n        self.pre_update_memory(data, fut_traj_from_velo)\n\n        ego_query = self.query_feat_embedding.weight.repeat(bs, 1)\n        ego_query = self.ego_info(ego_query, gt_ego_fut_cmd.to(ego_query.dtype)).unsqueeze(1)\n\n        ego_pos = get_rel_pos(ego_query.new_zeros(bs, 2), self.pc_range)\n        ego_pos = self.query_embedding(bevpos2posemb(ego_pos)).unsqueeze(1)\n        init_ego_traj =  self.memory_traj[:, 0:1]\n\n        hist_ego_query = self.hist_ego_mlp(self.memory_ego_embed) + self.time_embedding.weight[None]\n        ego_query = self.ego_ego_decoder(\n                query=ego_query,\n                key=hist_ego_query,\n                value=hist_ego_query,\n        )\n        ego_agent_query = self.ego_agent_decoder(query=ego_query,\n                key=agent_queries,\n                value=agent_queries,\n                query_pos=ego_pos,\n                key_pos=agent_pos,\n                reference_points_q=init_ego_traj,\n                reference_points_v=agent_reference_points)\n\n        ego_map_query = self.ego_map_decoder(query=ego_query,\n                key=map_queries,\n                value=map_queries,\n                query_pos=ego_pos,\n                key_pos=map_pos,\n                reference_points_q=init_ego_traj,\n                reference_points_v=map_lines,\n                map_scores=map_scores,\n                )\n        co_agent_query = torch.cat([ego_agent_query, ego_map_query], -1)\n        outputs_ego_trajs = self.ego_fut_decoder(co_agent_query)\n\n        outputs_ego_trajs = outputs_ego_trajs.reshape(outputs_ego_trajs.shape[0], \n                                                      self.ego_fut_mode, self.ego_fut_steps, 2)\n\n        self.post_update_memory(data, torch.cumsum(outputs_ego_trajs[gt_ego_fut_cmd==1], 1)[:, None], co_agent_query)\n        \n        ego_trajs = torch.cumsum(outputs_ego_trajs[gt_ego_fut_cmd==1], 1)\n        ego_trajs = torch.cat([torch.zeros_like(ego_trajs[:,:1]), ego_trajs], 1)\n        ego_trajs = torch.cat([ego_trajs, torch.zeros_like(ego_trajs[..., :1])], -1)\n        ego_trajs_in_global = transform_reference_points(ego_trajs, data['ego_pose'], reverse=False)[..., :2]\n\n        fut_trajs_in_global = torch.cat([fut_opt_traj_points, torch.zeros_like(fut_opt_traj_points[..., :1])], -1)\n        fut_trajs_in_global = transform_reference_points(fut_trajs_in_global, data['ego_pose'], reverse=False)[..., :2]\n\n        return dict(\n            pred_trajs=[\n                dict(\n                    pred_traj=pred_traj.view(B, N//self.num_fut_mode, self.num_fut_mode,  PK//self.code_size, self.code_size),\n                    pred_traj_cls=pred_traj_cls,\n                    valid_length=valid_length,\n                ),\n                dict(\n                    pred_traj=pred_opt_traj.view(B, N//self.num_fut_mode, self.num_fut_mode, PK//self.code_size, self.code_size),\n                    pred_traj_cls=pred_opt_traj_cls,\n                    valid_length=valid_length,\n            )],\n            fut_traj_from_velo = fut_traj_from_velo,\n            fut_trajs_in_global = fut_trajs_in_global,\n            pred_abs_trajs2 = fut_opt_traj_points,\n            obj_idxes = agent_instances.obj_idxes.clone(),\n            agent_scores = agent_instances.scores.clone(),\n            ego_fut_preds=outputs_ego_trajs,\n            ego_trajs_in_global = ego_trajs_in_global,\n            )\n        \n    @force_fp32(apply_to=('preds_dicts'))\n    def loss(self,\n             gt_agent_fut_traj=None,\n             gt_agent_fut_traj_mask=None,\n             gt_ego_fut_trajs=None,\n             gt_ego_fut_cmd=None,\n             gt_ego_fut_masks=None,\n             preds_dicts=None,\n             preds_map_dicts=None,\n             matched_gt_idxes=None,\n             img_metas=None,\n            ):\n\n        loss_dict = dict()\n        gt_agent_fut_traj_list = []\n        gt_agent_fut_traj_mask_list = []\n        B = len(gt_agent_fut_traj)\n        pred_trajs = preds_dicts['pred_trajs']\n        valid_length = pred_trajs[0]['valid_length']\n\n        for i in range(B):\n            index = matched_gt_idxes[i][:valid_length[i]]\n            if valid_length[i]>0:\n                gt_agent_fut_traj_list.append(gt_agent_fut_traj[i][:valid_length[i]][index])\n                gt_agent_fut_traj_mask_list.append(gt_agent_fut_traj_mask[i][:valid_length[i]][index])\n\n        gt_agent_fut_traj = torch.cat(gt_agent_fut_traj_list)\n        gt_agent_fut_traj_mask = torch.cat(gt_agent_fut_traj_mask_list).sum(-1) > 0\n\n        for lld, single_preds in  enumerate(pred_trajs):\n            pred_traj = single_preds['pred_traj']\n            pred_traj_cls = single_preds['pred_traj_cls']\n            pred_agent_fut_traj_list = []\n            pred_agent_fut_traj_cls_list = []\n\n            for i in range(B):\n                if valid_length[i]>0:\n                    pred_agent_fut_traj_list.append(pred_traj[i][:valid_length[i]])\n                    pred_agent_fut_traj_cls_list.append(pred_traj_cls[i][:valid_length[i]])\n\n            pred_traj = torch.cat(pred_agent_fut_traj_list)\n            pred_traj_cls = torch.cat(pred_agent_fut_traj_cls_list)\n            loss_traj, l_class, l_reg, l_minade, l_minfde, l_mr = self.loss_traj(pred_traj_cls, pred_traj, gt_agent_fut_traj, gt_agent_fut_traj_mask)\n            loss_dict.update({\n                    f'loss_traj.d{lld}': loss_traj,\n                    f'l_class.d{lld}': l_class,\n                    f'l_reg.d{lld}': l_reg,\n                    f'l_minade.d{lld}': l_minade,\n                    f'l_minfde.d{lld}': l_minfde,\n                    f'l_mr.d{lld}': l_mr,\n                }\n            )\n\n        ego_fut_preds = preds_dicts['ego_fut_preds']\n        map_lines = preds_map_dicts['lines']\n        B, NMQ, K2 = map_lines.shape\n        map_lines = map_lines.reshape(B, NMQ, K2//2, 2)\n        map_scores = preds_map_dicts['scores']\n\n        agent_fut_preds = preds_dicts['pred_abs_trajs2'].reshape(B, -1, self.num_fut_mode, 8, 2)[..., :self.ego_fut_steps, :2]\n        agent_score_preds = preds_dicts['agent_scores']\n        agent_fut_cls_preds = preds_dicts['pred_trajs'][-1]['pred_traj_cls']\n        gt_ego_fut_trajs = torch.stack(gt_ego_fut_trajs)\n        gt_ego_fut_cmd = torch.stack(gt_ego_fut_cmd)\n        gt_ego_fut_masks = torch.stack(gt_ego_fut_masks)\n        gt_ego_fut_trajs = torch.cat([gt_ego_fut_trajs[:,:1], (gt_ego_fut_trajs[:,1:] - gt_ego_fut_trajs[:,:-1])], 1)\n        gt_ego_fut_trajs = gt_ego_fut_trajs.unsqueeze(1).repeat(1, self.ego_fut_mode, 1, 1)\n\n        loss_plan_l1_weight = gt_ego_fut_cmd[..., None, None] * gt_ego_fut_masks[:, None, :, None]\n        loss_plan_l1_weight = loss_plan_l1_weight.repeat(1, 1, 1, 2)\n        \n        loss_plan_l1 = self.loss_plan_reg(\n            ego_fut_preds,\n            gt_ego_fut_trajs,\n            loss_plan_l1_weight\n        )\n\n        loss_plan_bound = self.loss_plan_bound(\n            ego_fut_preds[gt_ego_fut_cmd==1],\n            map_lines,\n            map_scores,\n            weight=gt_ego_fut_masks\n        )\n\n        loss_plan_col = self.loss_plan_col(\n            ego_fut_preds[gt_ego_fut_cmd==1],\n            agent_fut_preds,\n            agent_score_preds.squeeze(-1),\n            agent_fut_cls_preds,\n            weight=gt_ego_fut_masks[:, :, None].repeat(1, 1, 2)\n        )\n\n        loss_plan_dir = self.loss_plan_dir(\n            ego_fut_preds[gt_ego_fut_cmd==1],\n            map_lines,\n            map_scores,\n            weight=gt_ego_fut_masks\n        )\n\n        loss_plan_l1 = torch.nan_to_num(loss_plan_l1)\n        loss_plan_bound = torch.nan_to_num(loss_plan_bound)\n        loss_plan_col = torch.nan_to_num(loss_plan_col)\n        loss_plan_dir = torch.nan_to_num(loss_plan_dir)\n        \n        loss_dict['loss_plan_reg'] = loss_plan_l1\n        loss_dict['loss_plan_bound'] = loss_plan_bound\n        loss_dict['loss_plan_col'] = loss_plan_col\n        loss_dict['loss_plan_dir'] = loss_plan_dir\n\n        return loss_dict\n\n\n    @force_fp32(apply_to=('preds_dicts'))\n    def get_motion(self, preds_dicts, img_metas,  rescale=False):\n        \"\"\"Generate bboxes from bbox head predictions.\n        Args:\n            preds_dicts (tuple[list[dict]]): Prediction results.\n            img_metas (list[dict]): Point cloud and image's meta info.\n        Returns:\n            list[dict]: Decoded bbox, scores and labels after nms.\n        \"\"\"\n        num_samples = len(img_metas)\n\n        # fut_traj_points = preds_dicts['pred_abs_trajs'].view(num_samples, -1, self.num_fut_mode,  self.fut_steps, self.code_size)\n        # # fut_traj_index = preds_dicts['pred_trajs'][0]['pred_traj_cls'].softmax(-1).argmax(-1)\n        # scores, fut_traj_index = torch.max(preds_dicts['pred_trajs'][0]['pred_traj_cls'].softmax(-1), dim=-1)\n        # inds_rep = fut_traj_index.repeat(\n        #     self.fut_steps, self.code_size, 1, 1).permute(2, 3, 0, 1)\n        # fut_traj_points = fut_traj_points.gather(2, inds_rep.unsqueeze(2)).squeeze(dim=2)\n\n        fut_traj_points2 = preds_dicts['pred_abs_trajs2'].view(num_samples, -1, self.num_fut_mode,  self.fut_steps, self.code_size)\n        fut_trajs_in_global = preds_dicts['fut_trajs_in_global'].view(num_samples, -1, self.num_fut_mode,  self.fut_steps, self.code_size)\n        # fut_traj_index = preds_dicts['pred_trajs'][0]['pred_traj_cls'].softmax(-1).argmax(-1)\n        # scores, fut_traj_index = torch.max(preds_dicts['pred_trajs'][1]['pred_traj_cls'].softmax(-1), dim=-1)\n        # inds_rep = fut_traj_index.repeat(\n        #    self.fut_steps, self.code_size, 1, 1).permute(2, 3, 0, 1)\n        # fut_traj_points2 = fut_traj_points2.gather(2, inds_rep.unsqueeze(2)).squeeze(dim=2)\n\n        ret_list = [] \n        for i in range(num_samples):\n            ret_list.append(\n                dict(\n                    # pred_agent_fut_trajs = fut_traj_points[i].cpu().numpy(),\n                    fut_trajs_in_global = fut_trajs_in_global[i].cpu().numpy(),\n                    pred_agent_fut_trajs2 = fut_traj_points2[i].cpu().numpy(),\n                    pred_traj_cls = preds_dicts['pred_trajs'][1]['pred_traj_cls'][i].softmax(-1).cpu().numpy(),\n                    pred_traj = preds_dicts['pred_trajs'][1]['pred_traj'][i].cpu().numpy(),\n                    obj_idxes = preds_dicts['obj_idxes'][i].cpu().numpy()\n                    )\n                )\n        return ret_list\n\n\n    @force_fp32(apply_to=('preds_dicts'))\n    def get_traj(self, preds_dicts, img_metas,  rescale=False, gt_ego_fut_trajs=None, gt_ego_fut_cmd=None, gt_ego_fut_masks=None, gt_fut_segmentations=None, vad_ego_fut_trajs=None):\n        \"\"\"Generate bboxes from bbox head predictions.\n        Args:\n            preds_dicts (tuple[list[dict]]): Prediction results.\n            img_metas (list[dict]): Point cloud and image's meta info.\n        Returns:\n            list[dict]: Decoded bbox, scores and labels after nms.\n        \"\"\"\n        pred_ego_fut_trajs = preds_dicts['ego_fut_preds']\n\n        gt_ego_fut_trajs = torch.stack(gt_ego_fut_trajs).to(pred_ego_fut_trajs.device)\n        gt_ego_fut_cmd = torch.stack(gt_ego_fut_cmd).to(pred_ego_fut_trajs.device)\n        gt_ego_fut_masks = torch.stack(gt_ego_fut_masks).to(pred_ego_fut_trajs.device)\n\n        pred_ego_fut_trajs = torch.cumsum(pred_ego_fut_trajs[gt_ego_fut_cmd==1], 1)\n        # pred_ego_fut_trajs = vad_ego_fut_trajs[0][None]\n\n        metric_dict = {\n            'plan_L2_1s':0,\n            'plan_L2_2s':0,\n            'plan_L2_3s':0,\n            'plan_obj_col_1s':0,\n            'plan_obj_col_2s':0,\n            'plan_obj_col_3s':0,\n            'plan_obj_box_col_1s':0,\n            'plan_obj_box_col_2s':0,\n            'plan_obj_box_col_3s':0,\n            'l2_dist': 0,\n        }\n        \n\n        fut_valid_flag = gt_ego_fut_masks.all()\n        future_second = 3\n        metric_dict['fut_valid_flag'] = fut_valid_flag.cpu().item()\n        for i in range(future_second):\n            if fut_valid_flag:\n                cur_time = (i+1)*2\n                traj_L2 = self.planning_metric.compute_L2(\n                    pred_ego_fut_trajs[0, :cur_time].detach().to(gt_ego_fut_trajs.device),\n                    gt_ego_fut_trajs[0, :cur_time]\n                )\n\n                obj_coll, obj_box_coll = self.planning_metric.evaluate_coll(\n                    pred_ego_fut_trajs[:, :cur_time].detach().to(gt_ego_fut_trajs.device),\n                    gt_ego_fut_trajs[:, :cur_time],\n                    gt_fut_segmentations,\n                    index = [each['index'] for each in img_metas]\n                )\n                metric_dict['plan_L2_{}s'.format(i+1)] = traj_L2\n                metric_dict['plan_obj_col_{}s'.format(i+1)] = obj_coll.mean().item()\n                metric_dict['plan_obj_box_col_{}s'.format(i+1)] = obj_box_coll.max().item()\n        l2_dist = (pred_ego_fut_trajs-gt_ego_fut_trajs).norm(dim=-1) * gt_ego_fut_masks[:, None]\n\n        l2_dist[gt_ego_fut_masks[:, None]==0] = -1\n        metric_dict['l2_dist'] = l2_dist[0].cpu()\n        ret_list = []\n        num_samples = len(pred_ego_fut_trajs)\n        assert num_samples == 1\n        \n        index_w_scene = img_metas[0]['scene_name'] + '-' + str(img_metas[0]['index'])\n\n        for i in range(num_samples):\n            ret_list.append(\n                dict(\n                    pred_ego_fut_trajs = pred_ego_fut_trajs[i].cpu(),\n                    gt_ego_fut_trajs = gt_ego_fut_trajs[i].cpu(),\n                    metric_dict = metric_dict,\n                    l2_dist=l2_dist[i].cpu(),\n                    index_w_scene = index_w_scene,\n                    ego_trajs_in_global = preds_dicts['ego_trajs_in_global'][i].cpu(),\n                    gt_ego_fut_cmd = gt_ego_fut_cmd[i].cpu(),\n                    )\n                )\n        return ret_list\n\nclass MLN(nn.Module):\n    ''' \n    Args:\n        c_dim (int): dimension of latent code c\n        f_dim (int): feature dimension\n    '''\n\n    def __init__(self, c_dim, f_dim=256, use_ln=True):\n        super().__init__()\n        self.c_dim = c_dim\n        self.f_dim = f_dim\n        self.use_ln = use_ln\n\n        self.reduce = nn.Sequential(\n            nn.Linear(c_dim, f_dim),\n            nn.ReLU(),\n        )\n        self.gamma = nn.Linear(f_dim, f_dim)\n        self.beta = nn.Linear(f_dim, f_dim)\n        if self.use_ln:\n            self.ln = nn.LayerNorm(f_dim, elementwise_affine=False)\n        self.init_weight()\n\n    def init_weight(self):\n        nn.init.zeros_(self.gamma.weight)\n        nn.init.zeros_(self.beta.weight)\n        nn.init.ones_(self.gamma.bias)\n        nn.init.zeros_(self.beta.bias)\n\n    def forward(self, x, c):\n        if self.use_ln:\n            x = self.ln(x)\n        c = self.reduce(c)\n        gamma = self.gamma(c)\n        beta = self.beta(c)\n        out = gamma * x + beta\n\n        return out"
  },
  {
    "path": "mmdet3d/models/fbbev/motion_head/traj_loss.py",
    "content": "#---------------------------------------------------------------------------------#\n# UniAD: Planning-oriented Autonomous Driving (https://arxiv.org/abs/2212.10156)  #\n# Source code: https://github.com/OpenDriveLab/UniAD                              #\n# Copyright (c) OpenDriveLab. All rights reserved.                                #\n#---------------------------------------------------------------------------------#\n\nimport torch\nimport math\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom typing import Tuple\n\nfrom mmdet.models import LOSSES\n\n@LOSSES.register_module()\nclass TrajLoss(nn.Module):\n    \"\"\"\n    MTP loss modified to include variances. Uses MSE for mode selection.\n    Can also be used with\n    Multipath outputs, with residuals added to anchors.\n    \"\"\"\n\n    def __init__(self, use_variance=False, cls_loss_weight=1., nll_loss_weight=1., loss_weight_minade=0., loss_weight_minfde=1., loss_weight_mr=1.):\n        \"\"\"\n        Initialize MTP loss\n        :param args: Dictionary with the following (optional) keys\n            use_variance: bool, whether or not to use variances for computing\n            regression component of loss,\n                default: False\n            alpha: float, relative weight assigned to classification component,\n            compared to regression component\n                of loss, default: 1\n        \"\"\"\n        super(TrajLoss, self).__init__()\n        self.use_variance = use_variance\n        self.cls_loss_weight = cls_loss_weight\n        self.nll_loss_weight = nll_loss_weight\n        self.loss_weight_minade = loss_weight_minade\n        self.loss_weight_minfde = loss_weight_minfde\n\n    def forward(self,\n                traj_prob, \n                traj_preds, \n                gt_future_traj, \n                gt_future_traj_valid_mask):\n        \"\"\"\n        Compute MTP loss\n        :param predictions: Dictionary with 'traj': predicted trajectories\n        and 'probs': mode (log) probabilities\n        :param ground_truth: Either a tensor with ground truth trajectories\n        or a dictionary\n        :return:\n        \"\"\"\n        # Unpack arguments\n        traj = traj_preds # (b, nmodes, seq, 5)\n        log_probs = traj_prob\n        traj_gt = gt_future_traj\n\n        # Useful variables\n        batch_size = traj.shape[0]\n        sequence_length = traj.shape[2]\n        pred_params = 5 if self.use_variance else 2\n\n        # Masks for variable length ground truth trajectories\n        masks = 1 - gt_future_traj_valid_mask.to(traj.dtype)\n\n        l_minfde, inds = min_fde(traj, traj_gt, masks)\n        try:\n            l_mr = miss_rate(traj, traj_gt, masks)\n        except:\n            l_mr = torch.zeros_like(l_minfde)\n        l_minade, inds = min_ade(traj, traj_gt, masks)\n        inds_rep = inds.repeat(\n            sequence_length,\n            pred_params, 1, 1).permute(3, 2, 0, 1)\n\n        # Calculate MSE or NLL loss for trajectories corresponding to selected\n        # outputs:\n        traj_best = traj.gather(1, inds_rep).squeeze(dim=1)\n\n        if self.use_variance:\n            l_reg = traj_nll(traj_best, traj_gt, masks)\n        else:\n            l_reg = l_minade\n\n        # Compute classification loss\n        l_class = - torch.squeeze(log_probs.gather(1, inds.unsqueeze(1)))\n\n        l_reg = torch.sum(l_reg)/(batch_size + 1e-5) \n        l_class = torch.sum(l_class)/(batch_size + 1e-5)\n        l_minade = torch.sum(l_minade)/(batch_size + 1e-5) \n        l_minfde = torch.sum(l_minfde)/(batch_size + 1e-5) \n\n        loss = l_class * self.cls_loss_weight + l_reg * self.nll_loss_weight + l_minade * self.loss_weight_minade + l_minfde * self.loss_weight_minfde\n        return loss, l_class, l_reg, l_minade, l_minfde, l_mr\n\ndef min_ade(traj: torch.Tensor, traj_gt: torch.Tensor,\n            masks: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:\n    \"\"\"\n    Computes average displacement error for the best trajectory is a set,\n    with respect to ground truth\n    :param traj: predictions, shape [batch_size, num_modes, sequence_length, 2]\n    :param traj_gt: ground truth trajectory, shape\n    [batch_size, sequence_length, 2]\n    :param masks: masks for varying length ground truth, shape\n    [batch_size, sequence_length]\n    :return errs, inds: errors and indices for modes with min error, shape\n    [batch_size]\n    \"\"\"\n    num_modes = traj.shape[1]\n    traj_gt_rpt = traj_gt.unsqueeze(1).repeat(1, num_modes, 1, 1)\n    masks_rpt = masks.unsqueeze(1).repeat(1, num_modes, 1)\n    err = traj_gt_rpt - traj[:, :, :, 0:2]\n    err = torch.pow(err, exponent=2)\n    err = torch.sum(err, dim=3)\n    err = torch.pow(err, exponent=0.5)\n    err = torch.sum(err * (1 - masks_rpt), dim=2) / \\\n        torch.clip(torch.sum((1 - masks_rpt), dim=2), min=1)\n    err, inds = torch.min(err, dim=1)\n\n    return err, inds\n\ndef traj_nll(\n        pred_dist: torch.Tensor,\n        traj_gt: torch.Tensor,\n        masks: torch.Tensor):\n    \"\"\"\n    Computes negative log likelihood of ground truth trajectory under a\n    predictive distribution with a single mode,\n    with a bivariate Gaussian distribution predicted at each time in the\n    prediction horizon\n\n    :param pred_dist: parameters of a bivariate Gaussian distribution,\n    shape [batch_size, sequence_length, 5]\n    :param traj_gt: ground truth trajectory,\n    shape [batch_size, sequence_length, 2]\n    :param masks: masks for varying length ground truth,\n    shape [batch_size, sequence_length]\n    :return:\n    \"\"\"\n    mu_x = pred_dist[:, :, 0]\n    mu_y = pred_dist[:, :, 1]\n    x = traj_gt[:, :, 0]\n    y = traj_gt[:, :, 1]\n\n    sig_x = pred_dist[:, :, 2]\n    sig_y = pred_dist[:, :, 3]\n    rho = pred_dist[:, :, 4]\n    ohr = torch.pow(1 - torch.pow(rho, 2), -0.5)\n\n    nll = 0.5 * torch.pow(ohr, 2) * \\\n        (torch.pow(sig_x, 2) * torch.pow(x - mu_x, 2) + torch.pow(sig_y, 2) *\n         torch.pow(y - mu_y, 2) - 2 * rho * torch.pow(sig_x, 1) *\n         torch.pow(sig_y, 1) * (x - mu_x) * (y - mu_y)) - \\\n        torch.log(sig_x * sig_y * ohr) + 1.8379\n\n    nll[nll.isnan()] = 0\n    nll[nll.isinf()] = 0\n\n    nll = torch.sum(nll * (1 - masks), dim=1) / (torch.sum((1 - masks), dim=1) + 1e-5)\n    # Note: Normalizing with torch.sum((1 - masks), dim=1) makes values\n    # somewhat comparable for trajectories of\n    # different lengths\n\n    return nll\n\ndef min_fde(traj: torch.Tensor, traj_gt: torch.Tensor,\n            masks: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:\n    \"\"\"\n    Computes final displacement error for the best trajectory is a set,\n    with respect to ground truth\n    :param traj: predictions, shape [batch_size, num_modes, sequence_length, 2]\n    :param traj_gt: ground truth trajectory, shape\n    [batch_size, sequence_length, 2]\n    :param masks: masks for varying length ground truth, shape\n    [batch_size, sequence_length]\n    :return errs, inds: errors and indices for modes with min error,\n    shape [batch_size]\n    \"\"\"\n    num_modes = traj.shape[1]\n    lengths = torch.sum(1 - masks, dim=1).long()\n    valid_mask = lengths > 0\n    traj = traj[valid_mask]\n    traj_gt = traj_gt[valid_mask]\n    masks = masks[valid_mask]\n    traj_gt_rpt = traj_gt.unsqueeze(1).repeat(1, num_modes, 1, 1)\n    lengths = torch.sum(1 - masks, dim=1).long()\n    inds = lengths.unsqueeze(1).unsqueeze(\n        2).unsqueeze(3).repeat(1, num_modes, 1, 2) - 1\n\n    traj_last = torch.gather(traj[..., :2], dim=2, index=inds).squeeze(2)\n    traj_gt_last = torch.gather(traj_gt_rpt, dim=2, index=inds).squeeze(2)\n\n    err = traj_gt_last - traj_last[..., 0:2]\n    err = torch.pow(err, exponent=2)\n    err = torch.sum(err, dim=2)\n    err = torch.pow(err, exponent=0.5)\n    err, inds = torch.min(err, dim=1)\n\n    return err, inds\n\n\ndef miss_rate(\n        traj: torch.Tensor,\n        traj_gt: torch.Tensor,\n        masks: torch.Tensor,\n        dist_thresh: float = 2) -> torch.Tensor:\n    \"\"\"\n    Computes miss rate for mini batch of trajectories,\n    with respect to ground truth and given distance threshold\n    :param traj: predictions, shape [batch_size, num_modes, sequence_length, 2]\n    :param traj_gt: ground truth trajectory,\n    shape [batch_size, sequence_length, 2]\n    :param masks: masks for varying length ground truth,\n    shape [batch_size, sequence_length]\n    :param dist_thresh: distance threshold for computing miss rate.\n    :return errs, inds: errors and indices for modes with min error,\n    shape [batch_size]\n    \"\"\"\n    num_modes = traj.shape[1]\n\n    traj_gt_rpt = traj_gt.unsqueeze(1).repeat(1, num_modes, 1, 1)\n    masks_rpt = masks.unsqueeze(1).repeat(1, num_modes, 1)\n    dist = traj_gt_rpt - traj[:, :, :, 0:2]\n    dist = torch.pow(dist, exponent=2)\n    dist = torch.sum(dist, dim=3)\n    dist = torch.pow(dist, exponent=0.5)\n    dist[masks_rpt.bool()] = -math.inf\n    dist, _ = torch.max(dist, dim=2)\n    dist, _ = torch.min(dist, dim=1)\n    m_r = torch.sum(torch.as_tensor(dist > dist_thresh)) / len(dist)\n\n    return m_r\n"
  },
  {
    "path": "mmdet3d/models/fbbev/planner_head/AD_mlp.py",
    "content": "# Copyright (c) 2023-2024, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# TODO: add license here\n\n\nimport torch\nimport torch.nn.functional as F\nimport torch.nn as nn\nfrom mmcv.runner import force_fp32\nimport os\nfrom mmdet3d.ops.bev_pool_v2.bev_pool import TRTBEVPoolv2\nfrom mmdet.models import DETECTORS\nfrom mmdet3d.models import builder\nfrom mmdet3d.models.detectors import CenterPoint\nfrom mmdet3d.models.builder import build_head, build_neck\nimport numpy as np\nimport copy \nimport spconv.pytorch as spconv\nfrom tqdm import tqdm \nfrom mmdet3d.models.fbbev.utils import run_time\nimport torch\nfrom torchvision.utils import make_grid\nimport torchvision\nimport matplotlib\nimport matplotlib.pyplot as plt\nimport cv2\nfrom collections import defaultdict\nfrom mmcv.runner import get_dist_info\nfrom mmdet.core import reduce_mean\nimport mmcv\nfrom mmdet3d.datasets.utils import nuscenes_get_rt_matrix\nfrom mmdet3d.core.bbox import box_np_ops # , corner_to_surfaces_3d, points_in_convex_polygon_3d_jit\nimport gc\nfrom typing import Any, Dict, List, Optional, Tuple, Union\n\nimport torch\nfrom torch import nn\nfrom torch.nn import functional as F\nimport pickle\nimport numpy as np\nimport math\n\nimport copy\nimport math\nfrom mmcv.runner.base_module import BaseModule\nfrom mmdet3d.models.detectors.base import Base3DDetector\n\n\n\nimport torch\nimport torch.nn as nn \nfrom mmcv.cnn import Linear, bias_init_with_prob, Scale\n\nfrom mmcv.runner import force_fp32\nfrom mmdet.core import (build_assigner, build_sampler, multi_apply,\n                        reduce_mean)\nfrom mmdet.models.utils import build_transformer\nfrom mmdet.models import HEADS, build_loss\nfrom mmdet.models.dense_heads.anchor_free_head import AnchorFreeHead\nfrom mmdet.models.utils.transformer import inverse_sigmoid\nfrom mmdet3d.core.bbox.coders import build_bbox_coder\nfrom ..streampetr.streampetr_utils import *\nimport copy\nfrom mmdet.models.utils import NormedLinear\nfrom mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d\nfrom mmdet3d.models.fbbev.utils import save_tensor\nfrom mmcv.runner.base_module import BaseModule\nfrom mmcv.cnn.bricks.transformer import build_transformer_layer_sequence\nfrom .metric_stp3 import PlanningMetric\n# from memory_profiler import profile\nfrom matplotlib.backends.backend_agg import FigureCanvasAgg\nimport PIL.Image as Image\n\n\n\n\ndef get_ego_pos(points, pc_range):\n    if points.size(-1) == 3:\n        points = points * (pc_range[3:6] - pc_range[0:3]) + pc_range[0:3]\n    elif  points.size(-1) == 2:\n        points = points * (pc_range[3:5] - pc_range[0:2]) + pc_range[0:2]\n    return points\n\ndef get_rel_pos(points, pc_range):\n    if points.size(-1) == 3:\n        return (points - pc_range[0:3]) / (pc_range[3:6] - pc_range[0:3])\n    elif  points.size(-1) == 2:\n        return (points - pc_range[0:2]) / (pc_range[3:5] - pc_range[0:2])\n\n\n@HEADS.register_module()\nclass AD_MLP(Base3DDetector):\n    \"\"\"Implements the DETR transformer head.\n    See `paper: End-to-End Object Detection with Transformers\n    <https://arxiv.org/pdf/2005.12872>`_ for details.\n    Args:\n        num_classes (int): Number of categories excluding the background.\n        in_channels (int): Number of channels in the input feature map.\n        num_query (int): Number of query in Transformer.\n        num_reg_fcs (int, optional): Number of fully-connected layers used in\n            `FFN`, which is then used for the regression head. Default 2.\n        transformer (obj:`mmcv.ConfigDict`|dict): Config for transformer.\n            Default: None.\n        sync_cls_avg_factor (bool): Whether to sync the avg_factor of\n            all ranks. Default to False.\n        positional_encoding (obj:`mmcv.ConfigDict`|dict):\n            Config for position encoding.\n        loss_cls (obj:`mmcv.ConfigDict`|dict): Config of the\n            classification loss. Default `CrossEntropyLoss`.\n        loss_bbox (obj:`mmcv.ConfigDict`|dict): Config of the\n            regression loss. Default `L1Loss`.\n        loss_iou (obj:`mmcv.ConfigDict`|dict): Config of the\n            regression iou loss. Default `GIoULoss`.\n        tran_cfg (obj:`mmcv.ConfigDict`|dict): Training config of\n            transformer head.\n        test_cfg (obj:`mmcv.ConfigDict`|dict): Testing config of\n            transformer head.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n    _version = 2\n\n    def __init__(self,\n                 in_channels=256,\n                 embed_dims=256,\n                 num_query=1,\n                 num_reg_fcs=2,\n                 memory_len=12,\n                 transformer=None,\n                 sync_cls_avg_factor=False,\n                 code_weights=None,\n                 init_cfg=None,\n                 point_cloud_range=None,\n                 loss_plan_reg=dict(type='L1Loss', loss_weight=5.0),\n                **kwargs):\n\n        super().__init__()\n        if 'code_size' in kwargs:\n            self.code_size = kwargs['code_size']\n        else:\n            self.code_size = 2\n\n\n\n        self.num_query = num_query\n        self.in_channels = in_channels\n        self.num_reg_fcs = num_reg_fcs\n        # self.train_cfg = train_cfg\n        # self.test_cfg = test_cfg\n        self.fp16_enabled = False\n        self.embed_dims = embed_dims\n        self.num_motion_mode = 6\n        self.fut_steps = 6\n        self.memory_len = 6\n        self.ego_fut_mode = 3\n\n       \n        # self.code_weights = nn.Parameter(torch.tensor(\n        #     self.code_weights), requires_grad=False)\n        self.pc_range = nn.Parameter(torch.tensor(\n            point_cloud_range), requires_grad=False)\n\n\n\n        self.loss_plan_reg = build_loss(loss_plan_reg)\n\n        # self.ego_map_decoder = build_transformer_layer_sequence(self.ego_map_decoder)\n        # self.ego_decoder = build_transformer_layer_sequence(ego_agent_decoder)\n        self._init_layers()\n    \n        self.planning_metric = PlanningMetric()\n        self.count = 0\n        # dummy\n        self.history_sweep_time = None\n        self.history_bev = None\n        self.history_bev_before_encoder = None\n        self.history_seq_ids = None\n        self.history_forward_augs = None\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the transformer head.\"\"\"\n\n        ego_fut_decoder = []\n        ego_fut_dec_in_dim = self.embed_dims*2\n        for i in range(self.num_reg_fcs):\n            if i == 0:\n                ego_fut_decoder.append(Linear(12, ego_fut_dec_in_dim))\n            else:\n                ego_fut_decoder.append(Linear(ego_fut_dec_in_dim, ego_fut_dec_in_dim))\n            ego_fut_decoder.append(nn.ReLU())\n        ego_fut_decoder.append(Linear(ego_fut_dec_in_dim, self.ego_fut_mode*self.fut_steps*2))\n        self.ego_fut_decoder = nn.Sequential(*ego_fut_decoder)\n\n\n\n    def forward_train(self,  img_metas=None, **kwargs):\n        \n        \"\"\"\n        NOTE: if I do not `detach` the tensor but use `clone`, there will be a CPU memory leak. I do not figure it out yet.\n        \"\"\"\n        preds_plan_dicts = self.inner_forward(img_metas, **kwargs)\n        return self.loss(\n            preds_plan_dicts=preds_plan_dicts,\n            img_metas=img_metas,\n            **kwargs\n        )\n\n\n\n    def inner_forward(self,  img_metas=None, **kwargs):\n\n        \"\"\"\n        NOTE: if I do not `detach` the tensor but use `clone`, there will be a CPU memory leak. I do not figure it out yet.\n        \"\"\"\n\n        gt_ego_lcf_feat = torch.stack(kwargs['gt_ego_lcf_feat'], 0)\n        gt_ego_fut_cmd = torch.stack(kwargs['gt_ego_fut_cmd'], 0)\n        # gt_ego_fut_trajs =  torch.stack(kwargs['gt_ego_fut_trajs'], 0)\n        self.ego_fut_steps = 6\n\n        vel = gt_ego_lcf_feat[:, :2].unsqueeze(1).repeat(1, self.ego_fut_steps, 1) # * torch.arange(1, self.ego_fut_steps+1)\n        accelation =  gt_ego_lcf_feat[:, 2:4].unsqueeze(1).repeat(1, self.ego_fut_steps, 1) * torch.arange(1, self.ego_fut_steps+1)[None, :, None].to(vel.device) * 0.5\n        vel = vel # + accelation\n\n        fut_traj_from_velo = torch.cumsum(vel * 0.5, 1)# [0]\n        gt_ego_fut_trajs = kwargs['gt_ego_fut_trajs']# [0]\n        # np.corrco(fut_traj_from_velo.cpu().numpy(), gt_ego_fut_trajs.cpu().numpy())\n\n        input = torch.cat([gt_ego_lcf_feat, gt_ego_fut_cmd], -1)\n        \n        outputs_ego_trajs = self.ego_fut_decoder(input)\n        # reference = inverse_sigmoid(reference_points.clone())\n        outputs_ego_trajs = outputs_ego_trajs.reshape(outputs_ego_trajs.shape[0], \n                                                      self.ego_fut_mode, self.fut_steps, 2)\n        start_of_sequence = torch.FloatTensor([\n            single_img_metas['start_of_sequence'] \n            for single_img_metas in img_metas]).to(gt_ego_lcf_feat.device)\n\n        timestamp = torch.FloatTensor([\n            single_img_metas['timestamp'] \n            for single_img_metas in img_metas]).to(gt_ego_lcf_feat.device)\n\n        ego_pose_inv = torch.stack([\n            single_img_metas['ego_pose_inv'] \n            for single_img_metas in img_metas], 0).to(gt_ego_lcf_feat.device)\n\n        ego_pose = torch.stack([\n            single_img_metas['ego_pose'] \n            for single_img_metas in img_metas], 0).to(gt_ego_lcf_feat.device)\n\n        data = dict(\n            start_of_sequence = start_of_sequence,\n            timestamp = timestamp,\n            ego_pose_inv = ego_pose_inv,\n            ego_pose = ego_pose,\n        )\n\n        preds_plan_dicts =  dict(\n            # init_traj=reference_points[..., :2],\n            data= data,\n            ego_fut_preds=outputs_ego_trajs,\n            # ego_trajs_in_global = ego_trajs_in_global,\n            fut_traj_from_velo=fut_traj_from_velo\n        )\n        return preds_plan_dicts\n\n    def forward_test(self, **kwargs):\n        for key in ['img_metas', 'gt_ego_lcf_feat', 'gt_ego_fut_cmd', 'gt_ego_fut_trajs', 'gt_ego_fut_masks','gt_fut_segmentations', 'vad_ego_fut_trajs', 'gt_fut_segmentations_plus']:\n            kwargs[key] = kwargs[key][0] \n        \n        # img_metas = img_metas[0]\n        return self.simple_test(**kwargs)\n        \n\n    @force_fp32(apply_to=('preds_dicts'))\n    def loss(self,\n             gt_ego_fut_trajs=None,\n             gt_ego_fut_cmd=None,\n             gt_ego_fut_masks=None,\n             preds_plan_dicts=None,\n             img_metas=None,\n             **kwargs,\n            ):\n        \n        ego_fut_preds = preds_plan_dicts['ego_fut_preds']\n        gt_ego_fut_trajs = torch.stack(gt_ego_fut_trajs)\n        gt_ego_fut_cmd = torch.stack(gt_ego_fut_cmd)\n        gt_ego_fut_masks = torch.stack(gt_ego_fut_masks)\n        gt_ego_fut_trajs = torch.cat([gt_ego_fut_trajs[:,:1], (gt_ego_fut_trajs[:,1:] - gt_ego_fut_trajs[:,:-1])], 1)\n        gt_ego_fut_trajs = gt_ego_fut_trajs.unsqueeze(1).repeat(1, self.ego_fut_mode, 1, 1)\n\n        loss_plan_l1_weight = gt_ego_fut_cmd[..., None, None] * gt_ego_fut_masks[:, None, :, None]\n        loss_plan_l1_weight = loss_plan_l1_weight.repeat(1, 1, 1, 2)\n        \n        loss_plan_l1 = self.loss_plan_reg(\n            ego_fut_preds,\n            gt_ego_fut_trajs,\n            loss_plan_l1_weight\n        )\n\n        loss_plan_l1 = torch.nan_to_num(loss_plan_l1)\n        loss_plan_dict = dict()\n        loss_plan_dict['loss_plan_reg'] = loss_plan_l1\n\n        return loss_plan_dict\n    def aug_test(self): pass\n\n    @force_fp32(apply_to=('reference_points', 'cam_params'))\n    def point_sampling(self, reference_points, cam_params=None):\n\n        rots, trans, intrins, post_rots, post_trans, bda = cam_params\n        B, N, _ = trans.shape\n        eps = 1e-5\n        ogfH, ogfW = 900, 1600\n        reference_points = reference_points[None, None].repeat(B, N, 1, 1, 1, 1)\n        reference_points = torch.inverse(bda).view(B, 1, 1, 1, 1, 3,\n                          3).matmul(reference_points.unsqueeze(-1)).squeeze(-1)\n        reference_points -= trans.view(B, N, 1, 1, 1, 3)\n        combine = rots.matmul(torch.inverse(intrins)).inverse()\n        reference_points_cam = combine.view(B, N, 1, 1, 1, 3, 3).matmul(reference_points.unsqueeze(-1)).squeeze(-1)\n        reference_points_cam = torch.cat([reference_points_cam[..., 0:2] / torch.maximum(\n            reference_points_cam[..., 2:3], torch.ones_like(reference_points_cam[..., 2:3])*eps),  reference_points_cam[..., 2:3]], 5\n            )\n        reference_points_cam = post_rots.view(B, N, 1, 1, 1, 3, 3).matmul(reference_points_cam.unsqueeze(-1)).squeeze(-1)\n        reference_points_cam += post_trans.view(B, N, 1, 1, 1, 3) \n        # reference_points_cam[..., 0] /= ogfW\n        # reference_points_cam[..., 1] /= ogfH\n        mask = (reference_points_cam[..., 2:3] > eps)\n        mask = (mask & (reference_points_cam[..., 0:1] > eps) \n                 & (reference_points_cam[..., 0:1] < (1.0-eps) * ogfW) \n                 & (reference_points_cam[..., 1:2] > eps) \n                 & (reference_points_cam[..., 1:2] < (1.0-eps) * ogfH))\n        B, N, H, W, D, _ = reference_points_cam.shape\n        reference_points_cam = reference_points_cam.permute(1, 0, 2, 3, 4, 5).reshape(N, B, H*W, D, 3)\n        mask = mask.permute(1, 0, 2, 3, 4, 5).reshape(N, B, H*W, D, 1).squeeze(-1)\n\n        return reference_points, reference_points_cam[..., :2], mask, reference_points_cam[..., 2:3]\n\n    def simple_test(self, **kwargs):\n        \n        preds_plan_dicts = self.inner_forward(**kwargs)\n        pred_traj = self.get_bboxes(\n            preds_plan_dicts, **kwargs\n        )\n\n        img_metas = kwargs['img_metas']\n        output_list = [dict() for _ in range(len(img_metas))]\n        for i, result_dict in enumerate(output_list):\n            result_dict['pred_ego_traj'] = pred_traj[i]\n            result_dict['index'] = img_metas[i]['index']\n        \n        pred_ego_fut_trajs = output_list[0]['pred_ego_traj']['pred_ego_fut_trajs']\n\n\n        if not self.training:\n            pred_ego_fut_trajs_ = torch.cat([pred_ego_fut_trajs.new_zeros(1, 2), pred_ego_fut_trajs], 0)\n            rotate_angle_list=[]\n            rotate_angle = 0\n            for i in range(pred_ego_fut_trajs_.size(0)-1):\n                delta = pred_ego_fut_trajs_[i+1] - pred_ego_fut_trajs_[i]\n                cur_rotate_angle = torch.atan2(*delta[[1, 0]])\n                if delta.norm()<1: cur_rotate_angle = 0\n                rotate_angle = cur_rotate_angle\n                rotate_angle_list.append(rotate_angle)\n            fut_gt_bboxes_3d = kwargs['fut_boxes_in_cur_ego_list'][0][0]\n            rgb_image_list = []\n            rgb_image, front_img = self.visual_sample(output_list,  gt_bboxes_3d_=kwargs['gt_bboxes_3d'][0][0], ego_info=None, \n                cam_params=kwargs['img_inputs'][0][1:],\n                front_img=kwargs['img_inputs'][0][0][0, 1],\n                metric_dict = pred_traj[0]['metric_dict'],\n                **kwargs)\n            print(f'sc_{img_metas[0][\"index\"]}')\n            # mmcv.imwrite(rgb_image, f'sc_{img_metas[0][\"index\"]}.png')\n            # mmcv.mkdir_or_exist(f'vis/{img_metas[0][\"scene_name\"]}/')\n            mmcv.imwrite(front_img, f'vis/go_stright/{img_metas[0][\"scene_name\"]}/{img_metas[0][\"index\"]}.jpg')\n            # for i, gt_bboxes_3d in enumerate(fut_gt_bboxes_3d):\n                \n            #     ego_info = [pred_ego_fut_trajs[i][0].item(), pred_ego_fut_trajs[i][1].item(), 0], [1.85, 4.084, 1], rotate_angle_list[i].item()\n            #     rgb_image = self.visual_sample(output_list,  gt_bboxes_3d_=gt_bboxes_3d, ego_info=ego_info, **kwargs)\n            #     rgb_image_list.append(rgb_image)\n        \n        return output_list\n\n    def extract_feat(self): pass\n    @force_fp32(apply_to=('preds_dicts'))\n    def get_bboxes(self, preds_dicts, img_metas=None,  rescale=False, gt_ego_fut_trajs=None, gt_ego_fut_cmd=None, gt_ego_fut_masks=None,  gt_fut_segmentations_plus=None, gt_fut_segmentations=None, vad_ego_fut_trajs=None, **kwargs):\n        \"\"\"Generate bboxes from bbox head predictions.\n        Args:\n            preds_dicts (tuple[list[dict]]): Prediction results.\n            img_metas (list[dict]): Point cloud and image's meta info.\n        Returns:\n            list[dict]: Decoded bbox, scores and labels after nms.\n        \"\"\"\n        pred_ego_fut_trajs = preds_dicts['ego_fut_preds']\n        gt_ego_fut_trajs = torch.stack(gt_ego_fut_trajs).to(pred_ego_fut_trajs.device)\n        gt_ego_fut_cmd = torch.stack(gt_ego_fut_cmd).to(pred_ego_fut_trajs.device)\n        gt_ego_fut_masks = torch.stack(gt_ego_fut_masks).to(pred_ego_fut_trajs.device)\n\n        pred_ego_fut_trajs = torch.cumsum(pred_ego_fut_trajs[gt_ego_fut_cmd==1], 1)\n        # pred_ego_fut_trajs = vad_ego_fut_trajs[0][None]\n        pred_ego_fut_trajs = preds_dicts['fut_traj_from_velo']\n        ego_trajs = torch.cat([torch.zeros_like(pred_ego_fut_trajs[:,:1]), pred_ego_fut_trajs], 1)\n        ego_trajs = torch.cat([ego_trajs, torch.zeros_like(ego_trajs[..., :1])], -1)\n        ego_trajs_in_global = transform_reference_points(ego_trajs, preds_dicts['data']['ego_pose'], reverse=False)[..., :2]\n\n        # pred_ego_fut_trajs = gt_ego_fut_trajs\n        metric_dict = {\n            'plan_L2_1s':0,\n            'plan_L2_2s':0,\n            'plan_L2_3s':0,\n            'plan_obj_col_1s':0,\n            'plan_obj_col_2s':0,\n            'plan_obj_col_3s':0,\n            'plan_obj_box_col_1s':0,\n            'plan_obj_box_col_2s':0,\n            'plan_obj_box_col_3s':0,\n            'plan_obj_col_plus_1s':0,\n            'plan_obj_col_plus_2s':0,\n            'plan_obj_col_plus_3s':0,\n            'plan_obj_box_col_plus_1s':0,\n            'plan_obj_box_col_plus_2s':0,\n            'plan_obj_box_col_plus_3s':0,\n            'l2_dist': 0,\n        }\n        \n\n        fut_valid_flag = gt_ego_fut_masks.all()\n        future_second = 3\n        metric_dict['fut_valid_flag'] = fut_valid_flag.cpu().item()\n        for i in range(future_second):\n            if fut_valid_flag:\n                cur_time = (i+1)*2\n                traj_L2 = self.planning_metric.compute_L2(\n                    pred_ego_fut_trajs[0, :cur_time].detach().to(gt_ego_fut_trajs.device),\n                    gt_ego_fut_trajs[0, :cur_time]\n                )\n\n                obj_coll, obj_box_coll = self.planning_metric.evaluate_coll(\n                    pred_ego_fut_trajs[:, :cur_time].detach().to(gt_ego_fut_trajs.device),\n                    gt_ego_fut_trajs[:, :cur_time],\n                    gt_fut_segmentations,\n                    index = [each['index'] for each in img_metas],\n                    ignore_gt=False,\n                )\n                metric_dict['plan_L2_{}s'.format(i+1)] = traj_L2\n                metric_dict['plan_obj_col_{}s'.format(i+1)] = obj_coll.mean().item()\n                metric_dict['plan_obj_box_col_{}s'.format(i+1)] = obj_box_coll.max().item()\n        \n        for i in range(future_second):\n            if fut_valid_flag:\n                cur_time = (i+1)*2\n                obj_coll, obj_box_coll = self.planning_metric.evaluate_coll(\n                    pred_ego_fut_trajs[:, :cur_time].detach().to(gt_ego_fut_trajs.device),\n                    gt_ego_fut_trajs[:, :cur_time],\n                    gt_fut_segmentations_plus,\n                    index = [each['index'] for each in img_metas],\n                    ignore_gt=False,\n                )\n                metric_dict['plan_obj_col_plus_{}s'.format(i+1)] = obj_coll.mean().item()\n                metric_dict['plan_obj_box_col_plus_{}s'.format(i+1)] = obj_box_coll.max().item()\n\n        l2_dist = (pred_ego_fut_trajs-gt_ego_fut_trajs).norm(dim=-1) * gt_ego_fut_masks[:, None]\n\n        l2_dist[gt_ego_fut_masks[:, None]==0] = -1\n        metric_dict['l2_dist'] = l2_dist[0].cpu()\n        ret_list = []\n        num_samples = len(pred_ego_fut_trajs)\n        assert num_samples == 1\n        \n        index_w_scene = img_metas[0]['scene_name'] + '-' + str(img_metas[0]['index'])\n\n        for i in range(num_samples):\n            ret_list.append(\n                dict(\n                    pred_ego_fut_trajs = pred_ego_fut_trajs[i].cpu(),\n                    gt_ego_fut_trajs = gt_ego_fut_trajs[i].cpu(),\n                    metric_dict = metric_dict,\n                    l2_dist=l2_dist[i].cpu(),\n                    index_w_scene = index_w_scene,\n                    ego_trajs_in_global = ego_trajs_in_global[i].cpu(),\n                    gt_ego_fut_cmd = gt_ego_fut_cmd[i].cpu(),\n                    index = img_metas[i]['index']\n                    )\n                )\n        return ret_list\n\n    def world2bev_vis(self, x, y):\n             return int((x + 50) * 5), int((y + 50) * 5)\n\n    def visual_sample(self, results, gt_bboxes_3d_=None, ego_info=None, cam_params=None, front_img=None,\n            metric_dict = None,\n            **kwargs):\n\n        import matplotlib.pyplot as plt\n        import random\n        import math\n        import pyquaternion\n        from nuscenes.utils.data_classes import Box as NuScenesBox\n        from mmdet3d.core.bbox import CustomBox\n\n        # nusc = NuScenes(version='v1.0-trainval', dataroot='./data/nuscenes', verbose=True)\n        # _, boxes_gt, _ = nusc.get_sample_data(sample_data_token, box_vis_level=box_vis_level)\n\n        ratio=1\n        # plt.figure(figsize=(10, 10*ratio), dpi=300)\n        fig, axes = plt.subplots(1, 1, figsize=(10, 10*ratio), dpi=300)\n        plt.gca().set_axis_off()\n        plt.axis('off')\n        fig.tight_layout()\n\n        margin=50.0\n        coor_range = self.world2bev_vis(-margin, margin)\n        axes.set_xlim(np.array(coor_range))\n        axes.set_ylim(np.array(coor_range))\n        axes.grid(False)\n        # ax = plt.gca()\n        axes.set_aspect('equal', adjustable='box')  \n        axes.invert_yaxis()\n        random.seed(0)\n        colors = ['#%02X%02X%02X' % (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255) ) for _ in range(40)]\n\n        ego_center =  self.world2bev_vis(0, 0)\n        axes.scatter(ego_center[0], ego_center[1], s=15, marker='o',color='r', zorder=2)\n\n        if gt_bboxes_3d_ is not None:\n            # gt_bboxes_3d = kwargs['gt_bboxes_3d'][0][0]\n            # bev_coor = gt_bboxes_3d.bev.cpu().numpy()\n            # rects = [(tuple(coor[:2]), tuple(coor[2:4]), math.degrees(coor[4])) for coor in bev_coor]\n            # boxes = np.array([cv2.boxPoints(rect) for rect in rects])\n            # raw = gt_bboxes_3d.corners[:, [4, 7, 3, 0], :2]\n            boxes = gt_bboxes_3d_.tensor.numpy().copy()\n            for i, box in enumerate(boxes):\n                center = box[:3]\n                wlh = box[[4, 3, 5]]\n                box_yaw = box[6]\n                box_vel = box[7:].tolist()\n                box_vel.append(0)\n                quat = pyquaternion.Quaternion(axis=[0, 0, 1], radians=box_yaw)\n                center[:2]=np.array(self.world2bev_vis(center[0],center[1]))\n                wlh[0]=wlh[0]*5\n                wlh[1]=wlh[1]*5\n                nusc_box = CustomBox(center, wlh, quat, velocity=box_vel)\n                c = colors[i % len(colors)]\n                nusc_box.render(axes, view=np.eye(4), colors=(c, c, c), linewidth=1)\n            # if ego_info is not None:\n            #     center, wlh, yaw = ego_info\n            #     center[:2]=np.array(self.world2bev_vis(center[0],center[1]))\n            #     quat = pyquaternion.Quaternion(axis=[0, 0, 1], radians=yaw)\n            #     wlh[0]=wlh[0]*5\n            #     wlh[1]=wlh[1]*5\n            #     nusc_box = CustomBox(center, wlh, quat, velocity=[0, 0, 0])\n            #     c = colors[-1]\n            #     nusc_box.render(axes, view=np.eye(4), colors=(c, c, c), linewidth=1)\n        points_per_step=5\n        if results[0].get('pred_ego_traj') is not None:\n            pred_ego_fut_trajs = results[0]['pred_ego_traj']['pred_ego_fut_trajs']\n            # pred_ego_fut_trajs = kwargs['gt_ego_fut_trajs'][0].cpu()\n            pred_ego_fut_trajs = pred_ego_fut_trajs.numpy()\n            points = np.array([self.world2bev_vis(*point) for point in pred_ego_fut_trajs])          \n            points = np.insert(points, 0, np.array(ego_center), axis=0)\n           \n            points, colors = self._render_traj_v2(points, colormap='autumn')\n            x_coords, y_coords = zip(*points)\n            for j in range(len(points) - 1):                 \n                axes.plot([x_coords[j], x_coords[j + 1]], [y_coords[j], y_coords[j + 1]], '-',c=colors[j], linewidth=1.5, zorder=2)  \n                if j != 0 and j % points_per_step==0:\n                    axes.scatter(x_coords[j], y_coords[j], s=5, marker='o',color=colors[j], zorder=3)\n            axes.scatter(x_coords[-1], y_coords[-1], s=5, marker='o',color=colors[-1], zorder=3)\n\n            if front_img is not None:\n\n                pred_ego_fut_trajs = results[0]['pred_ego_traj']['pred_ego_fut_trajs']\n                # pred_ego_fut_trajs = kwargs['gt_ego_fut_trajs'][0].cpu()\n                pred_ego_fut_trajs = torch.cat([torch.tensor([[4, 0]]), pred_ego_fut_trajs], 0)\n                pred_ego_fut_trajs = torch.cat([pred_ego_fut_trajs, torch.zeros_like(pred_ego_fut_trajs[:, :1])], -1)\n                traj_on_img = self.point_sampling(pred_ego_fut_trajs[None, None].to(cam_params[0].device), cam_params)[1][1, 0, 0].cpu().numpy()\n                front_img = front_img.permute(1, 2, 0)[:, :, [2, 1, 0]].cpu().numpy()\n                front_img = np.ascontiguousarray(front_img, dtype=np.uint8)\n               \n                traj_on_img, colors = self._render_traj_v2(traj_on_img, colormap='autumn')\n                traj_on_img = np.ascontiguousarray(traj_on_img, dtype=np.int32)\n\n                for i in range(len(traj_on_img)-1):\n                    front_img = cv2.line(front_img, traj_on_img[i], traj_on_img[i+1] , color=colors[i] * 255, thickness=5)\n                \n                avg_l2 = 0\n                for i in range(1,4):\n                    avg_l2 += metric_dict[f'plan_L2_{i}s']\n                avg_l2/=3\n\n                avg_coli = 0\n                for i in range(1,4):\n                    avg_coli += metric_dict[f'plan_obj_box_col_{i}s']\n                avg_coli = (avg_coli/3)>0\n                \n                avg_intersect = 0\n                for i in range(1,4):\n                    avg_intersect += metric_dict[f'plan_obj_box_col_plus_{i}s']\n                avg_intersect = (avg_intersect/3)>0\n\n                # org \n                org = (50, 50) \n                # fontScale \n                fontScale = 1.5\n                # Blue color in BGR \n                color = (10, 10, 254) \n                # Line thickness of 2 px \n                thickness = 2\n                # Using cv2.putText() method \n                # front_img = cv2.rectangle(front_img, (0, 0), (300, 150), (255, 255, 255), -1)\n                front_img = cv2.putText(front_img, 'Avg.L2: %.2f'%avg_l2, (10, 40), cv2.FONT_HERSHEY_SIMPLEX , fontScale, color, thickness, cv2.LINE_AA) \n                # front_img = cv2.putText(front_img, f'Collision: NaN', (10, 90), cv2.FONT_HERSHEY_SIMPLEX , fontScale, color, thickness, cv2.LINE_AA) \n                # front_img = cv2.putText(front_img, f'Intersection: NaN', (10, 140), cv2.FONT_HERSHEY_SIMPLEX , fontScale, color, thickness, cv2.LINE_AA) \n                front_img = cv2.putText(front_img, f'Collision: {str(avg_coli)}', (10, 90), cv2.FONT_HERSHEY_SIMPLEX , fontScale, color, thickness, cv2.LINE_AA) \n                front_img = cv2.putText(front_img, f'Intersection: {str(avg_intersect)}', (10, 140), cv2.FONT_HERSHEY_SIMPLEX , fontScale, color, thickness, cv2.LINE_AA) \n                # mmcv.imwrite(front_img, '')\n\n        if kwargs.get('map_gt_bboxes_3d', False):\n            map_gt_bboxes_3d = kwargs['map_gt_bboxes_3d'][0][0]\n            map_gt_labels_3d = kwargs['map_gt_labels_3d'][0][0]\n            for i, instance in enumerate(map_gt_bboxes_3d.instance_list):\n                # if map_gt_labels_3d[i]!=2: continue\n                line = np.array(list(instance.coords))\n                corners = np.array([self.world2bev_vis(*corner) for corner in line])\n                corners = [each for each in corners if ((each>=0).all() & (each<512).all())]\n                if len(corners)<1: continue\n                x_coords, y_coords = zip(*corners)\n                for k, corner in enumerate(corners[:-1]):\n                    axes.plot([x_coords[k], x_coords[k + 1]], [y_coords[k], y_coords[k + 1]], c='dimgray', linewidth=1, zorder=1,) \n        \n        if kwargs.get('gt_agent_fut_traj', False):\n            gt_agent_fut_traj = kwargs['gt_agent_fut_traj'][0][0].cpu()\n            gt_agent_fut_traj_mask = kwargs['gt_agent_fut_traj_mask'][0][0].cpu()\n            centers = kwargs['gt_bboxes_3d'][0][0].center[..., :2].cpu()\n            tmp = torch.cat([centers[:, None], gt_agent_fut_traj], 1)\n            trajs = torch.cumsum(tmp, 1)\n            for k, traj in enumerate(trajs):               \n                traj = traj.cpu().numpy()\n                # center = np.array(self.world2bev_vis(*centers[k]))\n                agent_fut_traj = np.array([self.world2bev_vis(*corner) for corner in traj])\n                corners, colors = self._render_traj_v2(agent_fut_traj, colormap='winter',points_per_step=points_per_step)\n                corners = [each for each in corners if ((each>=0).all() & (each<1536).all())]\n                x_coords, y_coords = zip(*corners)\n                for j in range(len(corners) - 1):\n                    # plot line between box center and the first traj point\n                    if j//points_per_step == 0 and gt_agent_fut_traj_mask[k, j//points_per_step].sum()==2:\n                        axes.plot([x_coords[j], x_coords[j + 1]], [y_coords[j], y_coords[j + 1]], '-',c=colors[j], linewidth=0.8, zorder=2)  \n                        continue  \n                    elif gt_agent_fut_traj_mask[k, j//points_per_step].sum()<2 or gt_agent_fut_traj_mask[k, j//points_per_step-1].sum()<2:\n                        continue                                 \n                    axes.plot([x_coords[j], x_coords[j + 1]], [y_coords[j], y_coords[j + 1]], '-',c=colors[j], linewidth=0.8, zorder=2)  \n\n\n        plt.margins(0, 0)\n        # plt.savefig(f'pred_bev_{results[0][\"index\"]}.png')\n        canvas = FigureCanvasAgg(plt.gcf())\n        canvas.draw()\n        w, h = canvas.get_width_height()\n        buf = np.fromstring(canvas.tostring_argb(), dtype=np.uint8)\n        buf.shape = (w, h, 4)\n        buf = np.roll(buf, 3, axis=2)\n        image = Image.frombytes(\"RGBA\", (w, h), buf.tostring())\n        image = np.asarray(image)\n        rgb_image = image[:, :, :3]\n        plt.close()\n        return rgb_image, front_img\n\n    def _render_traj(self, future_traj, traj_score=1, colormap='winter', points_per_step=5, line_color=None, dot_color=None, dot_size=25):\n        total_steps = (len(future_traj)-1) * points_per_step + 1\n        dot_colors = matplotlib.colormaps[colormap](\n            np.linspace(0, 1, total_steps))[:, :3] * 255\n        dot_colors = dot_colors*traj_score + \\\n            (1-traj_score)*np.ones_like(dot_colors)\n        total_xy = np.zeros((total_steps, 2))\n        for i in range(total_steps-1):\n            unit_vec = future_traj[i//points_per_step +\n                                   1] - future_traj[i//points_per_step]\n            total_xy[i] = (i/points_per_step - i//points_per_step) * \\\n                unit_vec + future_traj[i//points_per_step]\n        total_xy[-1] = future_traj[-1]\n        return total_xy, dot_colors\n\n    def _render_traj_v2(self, future_traj, traj_score=1, colormap='winter', points_per_step=5, line_color=None, dot_color=None, dot_size=25):\n            total_steps = (len(future_traj)-1) * points_per_step + 1\n            dot_colors = matplotlib.colormaps[colormap](\n                np.linspace(0, 1, total_steps))[:, :3]\n    #         dot_colors = dot_colors*traj_score + \\\n    #             (1-traj_score)*np.ones_like(dot_colors)\n            total_xy = np.zeros((total_steps, 2))\n            for i in range(total_steps-1):\n                unit_vec = future_traj[i//points_per_step +\n                                    1] - future_traj[i//points_per_step]\n                total_xy[i] = (i/points_per_step - i//points_per_step) * \\\n                    unit_vec + future_traj[i//points_per_step]\n            total_xy[-1] = future_traj[-1]\n            return total_xy, dot_colors"
  },
  {
    "path": "mmdet3d/models/fbbev/planner_head/__init__.py",
    "content": "from .plan_loss import *\nfrom .plan_loss_gt import *\nfrom .naive_planner import NaivePlannerHead\nfrom .AD_mlp import AD_MLP"
  },
  {
    "path": "mmdet3d/models/fbbev/planner_head/metric_stp3.py",
    "content": "'''\ncalculate planner metric same as stp3\n'''\nimport numpy as np\nimport torch\nimport cv2\nimport copy\nimport matplotlib.pyplot as plt\nfrom skimage.draw import polygon\nfrom nuscenes.utils.data_classes import Box\nfrom scipy.spatial.transform import Rotation as R\n\nego_width, ego_length = 1.85, 4.084\n\nclass PlanningMetric():\n    def __init__(self):\n        super().__init__()\n        self.X_BOUND = [-50.0, 50.0, 0.1]  # Forward\n        self.Y_BOUND = [-50.0, 50.0, 0.1]  # Sides\n        self.Z_BOUND = [-10.0, 10.0, 20.0]  # Height\n        dx, bx, _ = self.gen_dx_bx(self.X_BOUND, self.Y_BOUND, self.Z_BOUND)\n        self.dx, self.bx = dx[:2], bx[:2]\n\n        bev_resolution, bev_start_position, bev_dimension = self.calculate_birds_eye_view_parameters(\n            self.X_BOUND, self.Y_BOUND, self.Z_BOUND\n        )\n        self.bev_resolution = bev_resolution.numpy()\n        self.bev_start_position = bev_start_position.numpy()\n        self.bev_dimension = bev_dimension.numpy()\n\n        self.W = ego_width\n        self.H = ego_length\n\n        self.category_index = {\n            'human':[2,3,4,5,6,7,8],\n            'vehicle':[14,15,16,17,18,19,20,21,22,23]\n        }\n        \n    def gen_dx_bx(self, xbound, ybound, zbound):\n        dx = torch.Tensor([row[2] for row in [xbound, ybound, zbound]])\n        bx = torch.Tensor([row[0] + row[2]/2.0 for row in [xbound, ybound, zbound]])\n        nx = torch.LongTensor([(row[1] - row[0]) / row[2] for row in [xbound, ybound, zbound]])\n\n        return dx, bx, nx\n    \n    def calculate_birds_eye_view_parameters(self, x_bounds, y_bounds, z_bounds):\n        \"\"\"\n        Parameters\n        ----------\n            x_bounds: Forward direction in the ego-car.\n            y_bounds: Sides\n            z_bounds: Height\n\n        Returns\n        -------\n            bev_resolution: Bird's-eye view bev_resolution\n            bev_start_position Bird's-eye view first element\n            bev_dimension Bird's-eye view tensor spatial dimension\n        \"\"\"\n        bev_resolution = torch.tensor([row[2] for row in [x_bounds, y_bounds, z_bounds]])\n        bev_start_position = torch.tensor([row[0] + row[2] / 2.0 for row in [x_bounds, y_bounds, z_bounds]])\n        bev_dimension = torch.tensor([(row[1] - row[0]) / row[2] for row in [x_bounds, y_bounds, z_bounds]],\n                                    dtype=torch.long)\n\n        return bev_resolution, bev_start_position, bev_dimension\n\n\n\n    def evaluate_single_coll(self, traj, segmentation, input_gt, gt_traj=None, index=None):\n        '''\n        traj: torch.Tensor (n_future, 2)\n            自车IMU系为轨迹参考系\n\n                0------->\n                |        x\n                |\n                |y\n                \n        segmentation: torch.Tensor (n_future, 200, 200)\n        '''\n        # 0.985793 is the distance betweem the LiDAR and the IMU(ego).\n\n        import mmcv\n        pts = np.array([\n            [-self.H / 2. + 0.5 + 0.985793, self.W / 2.],\n            [self.H / 2. + 0.5 + 0.985793, self.W / 2.],\n            [self.H / 2. + 0.5 + 0.985793, -self.W / 2.],\n            [-self.H / 2. + 0.5 + 0.985793, -self.W / 2.],\n        ])\n        pts = (pts - self.bx.cpu().numpy() ) / (self.dx.cpu().numpy())\n        pts[:, [0, 1]] = pts[:, [1, 0]]\n        rr, cc = polygon(pts[:,1], pts[:,0])\n        rc = np.concatenate([rr[:,None], cc[:,None]], axis=-1)\n        rc_ori = rc + (self.bx.cpu().numpy() / self.dx.cpu().numpy())\n\n\n        traj_with_ego = torch.cat([traj.new_zeros(1, 2), traj], 0)\n        rc_yaw = []\n        rotate_angle = 0\n        for i in range(traj.size(0)):\n            delta = traj_with_ego[i+1] - traj_with_ego[i]\n            cur_rotate_angle = torch.atan2(*delta[[1, 0]])\n            if delta.norm()<1: cur_rotate_angle = 0\n            rotate_angle = cur_rotate_angle\n            rotate_angle = -torch.tensor(rotate_angle)\n            rot_sin = torch.sin(rotate_angle)\n            rot_cos = torch.cos(rotate_angle)\n            rot_mat = torch.Tensor([[rot_cos, -rot_sin], [rot_sin, rot_cos]])\n            tmp = rc_ori @ rot_mat.cpu().numpy() -  (self.bx.cpu().numpy() / self.dx.cpu().numpy())\n            tmp = tmp.round().astype(np.int)\n            rc_yaw.append(tmp)\n           \n        rc_yaw = np.stack(rc_yaw)\n\n    \n        # n_future, _ = traj.shape\n        # trajs = traj.view(n_future, 1, 2)\n\n        # trajs_ = copy.deepcopy(trajs)\n        # trajs_ = trajs_ / self.dx.to(trajs.device)\n        # trajs_ = trajs_.cpu().numpy() + rc # (n_future, 32, 2)\n\n        # r = trajs_[:,:,0].astype(np.int32)\n        # r = np.clip(r, 0, self.bev_dimension[0] - 1)\n\n        # c = trajs_[:,:,1].astype(np.int32)\n        # c = np.clip(c, 0, self.bev_dimension[1] - 1)\n\n        # collision = np.full(n_future, False)\n        # for t in range(n_future):\n        #     rr = r[t]\n        #     cc = c[t]\n        #     I = np.logical_and(\n        #         np.logical_and(rr >= 0, rr < self.bev_dimension[0]),\n        #         np.logical_and(cc >= 0, cc < self.bev_dimension[1]),\n        #     )\n        #     collision[t] = np.any(segmentation[t,  cc[I], rr[I]].cpu().numpy())\n\n        n_future, _ = traj.shape\n        trajs = traj.view(n_future, 1, 2)\n\n        trajs_ = copy.deepcopy(trajs)\n        trajs_ = trajs_ / self.dx.to(trajs.device)\n        trajs_ = trajs_.cpu().numpy() + rc_yaw # (n_future, 32, 2)\n\n        r = trajs_[:,:,0].astype(np.int32)\n        r = np.clip(r, 0, self.bev_dimension[0] - 1)\n\n        c = trajs_[:,:,1].astype(np.int32)\n        c = np.clip(c, 0, self.bev_dimension[1] - 1)\n\n        collision2 = np.full(n_future, False)\n        # obs_occ = copy.deepcopy(segmentation).cpu().numpy() * 0\n        for t in range(n_future):\n            rr = r[t]\n            cc = c[t]\n            I = np.logical_and(\n                np.logical_and(rr >= 0, rr < self.bev_dimension[0]),\n                np.logical_and(cc >= 0, cc < self.bev_dimension[1]),\n            )\n           \n            collision2[t] = np.any(segmentation[t,  cc[I], rr[I]].cpu().numpy())\n        return torch.from_numpy(collision2).to(device=traj.device)\n\n    def evaluate_coll(\n            self, \n            trajs, \n            gt_trajs, \n            segmentation,\n            index=None,\n            ignore_gt=True,\n        ):\n        '''\n        trajs: torch.Tensor (B, n_future, 2)\n        自车IMU系为轨迹参考系\n\n                0------->\n                |        x\n                |\n                |y\n        gt_trajs: torch.Tensor (B, n_future, 2)\n        segmentation: torch.Tensor (B, n_future, 200, 200)\n\n        '''\n        B, n_future, _ = trajs.shape\n        # trajs = trajs * torch.tensor([-1, 1], device=trajs.device)\n        # gt_trajs = gt_trajs * torch.tensor([-1, 1], device=gt_trajs.device)\n\n        obj_coll_sum = torch.zeros(n_future, device=segmentation.device)\n        obj_box_coll_sum = torch.zeros(n_future, device=segmentation.device)\n\n        for i in range(B):\n            gt_box_coll = self.evaluate_single_coll(gt_trajs[i], segmentation[i], input_gt=True)\n\n            xx, yy = trajs[i,:,0], trajs[i, :, 1]\n\n            xi = ((-self.bx[0] + xx) / self.dx[0]).long()\n            yi = ((-self.bx[1] + yy) / self.dx[1]).long()\n\n            m1 = torch.logical_and(\n                torch.logical_and(xi >= 0, xi < self.bev_dimension[0]),\n                torch.logical_and(yi >= 0, yi < self.bev_dimension[1]),\n            ).to(gt_box_coll.device)\n            m1 = torch.logical_and(m1, torch.logical_not(gt_box_coll))\n\n            ti = torch.arange(n_future).to(segmentation.device)\n            # segmentation: B, T, H, W\n            obj_coll_sum[ti[m1]] += segmentation[i, ti[m1], yi[m1], xi[m1]].long()\n\n            m2 = torch.logical_not(gt_box_coll)\n            box_coll = self.evaluate_single_coll(trajs[i],\n                    segmentation[i],\n                    gt_traj=gt_trajs[i],\n                    input_gt=False,\n                    index=index[i],\n                    ).to(segmentation.device)\n            if ignore_gt:\n                obj_box_coll_sum += (gt_box_coll).long()                \n            else:\n                obj_box_coll_sum[ti[m2]] += (box_coll[ti[m2]]).long()\n        return obj_coll_sum, obj_box_coll_sum\n\n    def compute_L2(self, trajs, gt_trajs):\n        '''\n        trajs: torch.Tensor (n_future, 2)\n        gt_trajs: torch.Tensor (n_future, 2)\n        '''\n        # return torch.sqrt(((trajs[:, :, :2] - gt_trajs[:, :, :2]) ** 2).sum(dim=-1))\n        pred_len = trajs.shape[0]\n        ade = float(\n            sum(\n                torch.sqrt(\n                    (trajs[i, 0] - gt_trajs[i, 0]) ** 2\n                    + (trajs[i, 1] - gt_trajs[i, 1]) ** 2\n                )\n                for i in range(pred_len)\n            )\n            / pred_len\n        )\n        \n        return ade\n"
  },
  {
    "path": "mmdet3d/models/fbbev/planner_head/naive_planner.py",
    "content": "# Copyright (c) 2023-2024, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# TODO: add license here\n\n\nimport torch\nimport torch.nn as nn \nfrom mmcv.cnn import Linear, bias_init_with_prob, Scale\n\nfrom mmcv.runner import force_fp32\nfrom mmdet.core import (build_assigner, build_sampler, multi_apply,\n                        reduce_mean)\nfrom mmdet.models.utils import build_transformer\nfrom mmdet.models import HEADS, build_loss\nfrom mmdet.models.dense_heads.anchor_free_head import AnchorFreeHead\nfrom mmdet.models.utils.transformer import inverse_sigmoid\nfrom mmdet3d.core.bbox.coders import build_bbox_coder\nfrom ..streampetr.streampetr_utils import *\nimport copy\nfrom mmdet.models.utils import NormedLinear\nfrom mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d\nfrom mmdet3d.models.fbbev.utils import save_tensor\nfrom mmcv.runner.base_module import BaseModule\nfrom mmcv.cnn.bricks.transformer import build_transformer_layer_sequence\nfrom .metric_stp3 import PlanningMetric\n\n\ndef get_ego_pos(points, pc_range):\n    if points.size(-1) == 3:\n        points = points * (pc_range[3:6] - pc_range[0:3]) + pc_range[0:3]\n    elif  points.size(-1) == 2:\n        points = points * (pc_range[3:5] - pc_range[0:2]) + pc_range[0:2]\n    return points\n\ndef get_rel_pos(points, pc_range):\n    if points.size(-1) == 3:\n        return (points - pc_range[0:3]) / (pc_range[3:6] - pc_range[0:3])\n    elif  points.size(-1) == 2:\n        return (points - pc_range[0:2]) / (pc_range[3:5] - pc_range[0:2])\n\n\n@HEADS.register_module()\nclass NaivePlannerHead(BaseModule):\n  \n    _version = 2\n\n    def __init__(self,\n                 # num_classes=1,\n                 in_channels=256,\n                 stride=[16],\n                 embed_dims=256,\n                 num_query=1,\n                 num_reg_fcs=2,\n                 memory_len=12,\n                 topk_proposals=4,\n                 num_propagated=0,\n                 with_dn=True,\n                 with_ego_pos=True,\n                 match_with_velo=True,\n                 match_costs=None,\n                 transformer=None,\n                 sync_cls_avg_factor=False,\n                 code_weights=None,\n                 bbox_coder=None,\n                 init_cfg=None,\n                 normedlinear=False,\n                 point_cloud_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0],\n                 loss_plan_reg=dict(type='L1Loss', loss_weight=5.0),\n                 loss_plan_col=dict(type='PlanCollisionLoss', loss_weight=5.0),\n                 with_ego_status=False,\n                 dist_func_type='MDE',\n                 use_map_info=False,\n                **kwargs):\n\n        if 'code_size' in kwargs:\n            self.code_size = kwargs['code_size']\n        else:\n            self.code_size = 2\n        self.use_map_info = use_map_info\n\n        self.with_ego_status = with_ego_status\n        self.num_query = num_query\n        self.in_channels = in_channels\n        self.num_reg_fcs = num_reg_fcs\n        # self.train_cfg = train_cfg\n        # self.test_cfg = test_cfg\n        self.fp16_enabled = False\n        self.embed_dims = embed_dims\n        self.num_motion_mode = 6\n        self.fut_steps = 6\n        self.memory_len = 6\n        self.ego_fut_mode = 3\n\n\n        super(NaivePlannerHead, self).__init__()\n       \n        self.pc_range = nn.Parameter(torch.tensor(\n            point_cloud_range), requires_grad=False)\n\n        self.loss_plan_reg = build_loss(loss_plan_reg)\n        loss_plan_col.update(point_cloud_range=point_cloud_range)\n        self.loss_plan_col = build_loss(loss_plan_col)\n\n\n        ego_img_decoder = dict(\n                    type='CustomTransformerDecoder',\n                    num_layers=1,\n                    return_intermediate=False,\n                    transformerlayers=dict(\n                        type='BaseTransformerLayer',\n                        batch_first=True,\n                        attn_cfgs=dict(\n                            type='MultiheadAttention',\n                            embed_dims=256,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                        feedforward_channels=1024,\n                        ffn_dropout=0.1,\n                        operation_order=('cross_attn', 'norm', 'ffn', 'norm')))\n        if self.use_map_info:\n            ego_agent_decoder = dict(\n                    type='CustomTransformerDecoder',\n                    num_layers=1,\n                    return_intermediate=False,\n                    transformerlayers=dict(\n                        type='BaseTransformerLayer',\n                        batch_first=True,\n                        attn_cfgs=dict(\n                            type='MotionSelfAttention',\n                            embed_dims=256,\n                            num_heads=8,\n                            dropout=0.1,\n                            dist_func_type=dist_func_type,\n                            pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0],\n                            consider_map_quality=True,\n                        ),\n                    feedforward_channels=2048,\n                    ffn_dropout=0.1,\n                    operation_order=('cross_attn', 'norm', 'ffn', 'norm')))\n\n            self.ego_agent_decoder = build_transformer_layer_sequence(ego_agent_decoder)\n            self.gamma = nn.Parameter(torch.ones(256)*0.5, requires_grad=True)\n        self.ego_img_decoder = build_transformer_layer_sequence(ego_img_decoder)\n        # self.ego_decoder = build_transformer_layer_sequence(ego_agent_decoder)\n\n        self.ego_info = MLN(3)\n        self._init_layers()\n        self.reset_memory()\n        self.planning_metric = PlanningMetric()\n\n        self.count = 0\n    \n    def reset_memory(self):\n        self.memory_traj = None\n        # self.memory_ego_embed = None\n\n    def pre_update_memory(self, data, fut_traj_from_velo):\n\n        x = 1-data['start_of_sequence'] # original prev_exist, so we need do `not`\n        B = x.size(0)\n        # refresh the memory when the scene changes\n        if self.memory_traj is None:\n            self.memory_traj =  fut_traj_from_velo.unsqueeze(1).repeat(1, self.memory_len, 1, 1) * 0\n            # self.memory_ego_embed = x.new_zeros(B, self.memory_len, self.embed_dims * 2)\n        else:\n            self.memory_traj = transform_reference_points(self.memory_traj, data['ego_pose_inv'], reverse=False)[..., :2]\n            self.memory_traj = memory_refresh(self.memory_traj[:, :self.memory_len], x) \n            for i in range(B):\n                if not x[i]: self.memory_traj[i, 0] = fut_traj_from_velo[i] * 0\n            \n            # self.memory_ego_embed = memory_refresh(self.memory_ego_embed[:, :self.memory_len], x)\n\n    def post_update_memory(self, data, ego_fut_trajs, ego_embeds):\n        self.memory_traj = torch.cat([ego_fut_trajs, self.memory_traj], dim=1)\n        self.memory_traj = torch.cat([self.memory_traj, torch.zeros_like(self.memory_traj[..., :1])], -1)\n        self.memory_traj = transform_reference_points(self.memory_traj, data['ego_pose'], reverse=False)\n        # self.memory_ego_embed = torch.cat([ego_embeds, self.memory_ego_embed], dim=1)\n    \n    def _init_layers(self):\n        \"\"\"Initialize layers of the transformer head.\"\"\"\n\n        ego_fut_decoder = []\n        ego_fut_dec_in_dim = self.embed_dims\n        if self.with_ego_status:\n            ego_fut_dec_in_dim += 9\n        \n        for i in range(self.num_reg_fcs):\n            if i ==0: \n                ego_fut_decoder.append(Linear(ego_fut_dec_in_dim, self.embed_dims))\n            else:\n                ego_fut_decoder.append(Linear(self.embed_dims, self.embed_dims))\n            ego_fut_decoder.append(nn.ReLU())\n        ego_fut_decoder.append(Linear(self.embed_dims, self.ego_fut_mode*self.fut_steps*2))\n        self.ego_fut_decoder = nn.Sequential(*ego_fut_decoder)\n\n        self.query_feat_embedding = nn.Embedding(self.num_query, self.embed_dims)\n\n    def calc_MDE(self, reference_points_q, reference_points_v, pc_range, map_scores=None):\n        \"\"\"\n        mim mean distance between the map lane and traj.\n        \"\"\"\n\n        reference_points_q = reference_points_q[..., :2]\n        q_shape = reference_points_q.shape\n        v_shape = reference_points_v.shape\n        reference_points_q = reference_points_q.flatten(1, 2)\n        reference_points_v = reference_points_v.flatten(1, 2)\n        \n        dist = []\n        code_size = reference_points_q.size(-1)\n        for b in range(reference_points_q.shape[0]):\n            dist_b = torch.norm(reference_points_q[b].reshape(-1, 1, code_size) - reference_points_v[b].reshape(1, -1, code_size), dim=-1)\n            dist.append(dist_b[None, ...])\n        dist = torch.cat(dist, dim=0)  # [B, Q, K]\n        dist = dist.view(q_shape[0], q_shape[1], q_shape[2], v_shape[1], v_shape[2])\n        dist = dist.min(-1).values.mean(2)\n        \n        if map_scores is not None:\n            map_scores = map_scores.sigmoid().max(-1)[0] # smaller, better\n            map_scores = torch.round(1-map_scores, decimals=1) + self.map_alpha\n            dist = dist * map_scores.unsqueeze(1)\n            \n        dist = -dist\n\n        return dist\n\n    def forward(self, results, gt_ego_lcf_feat, gt_ego_fut_cmd, gt_ego_his_traj=None, gt_ego_fut_trajs=None, img_metas=None, map_results=None):\n        \n        # agent_queries = map_results['queries']\n        if self.use_map_info:\n            map_queries = map_results['queries'].clone()\n            map_lines = map_results['lines'].clone()\n            map_scores = map_results['scores'].clone()\n            B, NMQ, K2 = map_lines.shape\n            map_lines = map_lines.reshape(B, NMQ, K2//2, 2)\n            # map_pos = self.query_embedding(bevpos2posemb(map_lines.mean(-2)))\n            map_lines = get_ego_pos(map_lines, self.pc_range)\n\n        img_context = results['img_bev_feat'][0].flatten(-2, -1).permute(0, 2, 1)\n        \n        gt_ego_lcf_feat = torch.stack(gt_ego_lcf_feat).to(img_context.device)\n        gt_ego_fut_cmd = torch.stack(gt_ego_fut_cmd).to(img_context.device)\n\n        start_of_sequence = torch.FloatTensor([\n            single_img_metas['start_of_sequence'] \n            for single_img_metas in img_metas]).to(img_context.device)\n\n        timestamp = torch.FloatTensor([\n            single_img_metas['timestamp'] \n            for single_img_metas in img_metas]).to(img_context.device)\n\n        ego_pose_inv = torch.stack([\n            single_img_metas['ego_pose_inv'] \n            for single_img_metas in img_metas], 0).to(img_context.device)\n\n        ego_pose = torch.stack([\n            single_img_metas['ego_pose'] \n            for single_img_metas in img_metas], 0).to(img_context.device)\n\n        data = dict(\n            start_of_sequence = start_of_sequence,\n            timestamp = timestamp,\n            ego_pose_inv = ego_pose_inv,\n            ego_pose = ego_pose,\n        )\n\n        fut_traj_from_velo = gt_ego_lcf_feat[:, :2].unsqueeze(1).repeat(1, self.fut_steps, 1) * torch.arange(1, self.fut_steps+1)[None,:, None].to(img_context.device) * 0.5\n\n        self.pre_update_memory(data, fut_traj_from_velo)\n        bs = img_context.size(0)\n        ego_query = self.query_feat_embedding.weight.repeat(bs, 1)\n        ego_query = self.ego_info(ego_query, gt_ego_fut_cmd.to(ego_query.dtype)).unsqueeze(1)\n\n        init_ego_traj =  self.memory_traj[:, 0:1]\n\n        if self.use_map_info:\n            ego_query = (1-self.gamma) * self.ego_agent_decoder(\n                query = ego_query,\n                key = map_queries,\n                val = map_queries,\n                reference_points_q=init_ego_traj,\n                reference_points_v=map_lines,\n                pc_range=self.pc_range,\n                map_scores=map_scores\n                )   + self.gamma * self.ego_img_decoder(\n                query = ego_query,\n                key = img_context,\n                val = img_context,\n                # query_pos = ego_pose\n                )\n        else:\n            ego_query =self.ego_img_decoder(\n                query = ego_query,\n                key = img_context,\n                val = img_context,\n                )\n        if self.with_ego_status:\n            ego_query = torch.cat([ego_query[:, 0], gt_ego_lcf_feat], -1)\n        outputs_ego_trajs = self.ego_fut_decoder(ego_query)\n        outputs_ego_trajs = outputs_ego_trajs.reshape(outputs_ego_trajs.shape[0], \n                                                      self.ego_fut_mode, self.fut_steps, 2)\n\n\n        self.post_update_memory(data, torch.cumsum(outputs_ego_trajs[gt_ego_fut_cmd==1], 1)[:, None], ego_query)\n        \n\n        ego_trajs = torch.cumsum(outputs_ego_trajs[gt_ego_fut_cmd==1], 1)\n        ego_trajs = torch.cat([torch.zeros_like(ego_trajs[:,:1]), ego_trajs], 1)\n        ego_trajs = torch.cat([ego_trajs, torch.zeros_like(ego_trajs[..., :1])], -1)\n        ego_trajs_in_global = transform_reference_points(ego_trajs, data['ego_pose'], reverse=False)[..., :2]\n\n        return dict(\n            ego_fut_preds=outputs_ego_trajs,\n            ego_trajs_in_global = ego_trajs_in_global,\n            data=data\n        )\n\n    @force_fp32(apply_to=('preds_plan_dicts'))\n    def loss(self,\n             gt_ego_fut_trajs=None,\n             gt_ego_fut_cmd=None,\n             gt_ego_fut_masks=None,\n             preds_plan_dicts=None,\n             img_metas=None,\n            ):\n        \n\n        ego_fut_preds = preds_plan_dicts['ego_fut_preds']\n        gt_ego_fut_trajs = torch.stack(gt_ego_fut_trajs)\n        gt_ego_fut_cmd = torch.stack(gt_ego_fut_cmd)\n        gt_ego_fut_masks = torch.stack(gt_ego_fut_masks)\n        gt_ego_fut_trajs = torch.cat([gt_ego_fut_trajs[:,:1], (gt_ego_fut_trajs[:,1:] - gt_ego_fut_trajs[:,:-1])], 1)\n        gt_ego_fut_trajs = gt_ego_fut_trajs.unsqueeze(1).repeat(1, self.ego_fut_mode, 1, 1)\n\n        loss_plan_l1_weight = gt_ego_fut_cmd[..., None, None] * gt_ego_fut_masks[:, None, :, None]\n        loss_plan_l1_weight = loss_plan_l1_weight.repeat(1, 1, 1, 2)\n        \n        loss_plan_l1 = self.loss_plan_reg(\n            ego_fut_preds,\n            gt_ego_fut_trajs,\n            loss_plan_l1_weight\n        )\n\n        loss_plan_l1 = torch.nan_to_num(loss_plan_l1)\n      \n        loss_plan_dict = dict()\n        loss_plan_dict['loss_plan_reg'] = loss_plan_l1\n\n        return loss_plan_dict\n\n\n    @force_fp32(apply_to=('preds_dicts'))\n    def get_bboxes(self, preds_dicts, img_metas,  rescale=False, gt_ego_fut_trajs=None, \n        gt_ego_fut_cmd=None, gt_ego_fut_masks=None, gt_fut_segmentations=None, gt_fut_segmentations_plus=None,\n        vad_ego_fut_trajs=None, **kwargs,\n        ):\n        \"\"\"Generate bboxes from bbox head predictions.\n        Args:\n            preds_dicts (tuple[list[dict]]): Prediction results.\n            img_metas (list[dict]): Point cloud and image's meta info.\n        Returns:\n            list[dict]: Decoded bbox, scores and labels after nms.\n        \"\"\"\n        pred_ego_fut_trajs = preds_dicts['ego_fut_preds']\n\n        gt_ego_fut_trajs = torch.stack(gt_ego_fut_trajs).to(pred_ego_fut_trajs.device)\n        gt_ego_fut_cmd = torch.stack(gt_ego_fut_cmd).to(pred_ego_fut_trajs.device)\n        gt_ego_fut_masks = torch.stack(gt_ego_fut_masks).to(pred_ego_fut_trajs.device)\n\n        pred_ego_fut_trajs = torch.cumsum(pred_ego_fut_trajs[gt_ego_fut_cmd==1], 1)\n        # pred_ego_fut_trajs = vad_ego_fut_trajs[0][None]\n        ego_trajs = torch.cat([torch.zeros_like(pred_ego_fut_trajs[:,:1]), pred_ego_fut_trajs], 1)\n        ego_trajs = torch.cat([ego_trajs, torch.zeros_like(ego_trajs[..., :1])], -1)\n        ego_trajs_in_global = transform_reference_points(ego_trajs, preds_dicts['data']['ego_pose'], reverse=False)[..., :2]\n        metric_dict = {\n            'plan_L2_1s':0,\n            'plan_L2_2s':0,\n            'plan_L2_3s':0,\n            'plan_obj_col_1s':0,\n            'plan_obj_col_2s':0,\n            'plan_obj_col_3s':0,\n            'plan_obj_box_col_1s':0,\n            'plan_obj_box_col_2s':0,\n            'plan_obj_box_col_3s':0,\n            'plan_obj_col_plus_1s':0,\n            'plan_obj_col_plus_2s':0,\n            'plan_obj_col_plus_3s':0,\n            'plan_obj_box_col_plus_1s':0,\n            'plan_obj_box_col_plus_2s':0,\n            'plan_obj_box_col_plus_3s':0,\n            'l2_dist': 0,\n        }\n        \n\n        fut_valid_flag = gt_ego_fut_masks.all()\n        future_second = 3\n        metric_dict['fut_valid_flag'] = fut_valid_flag.cpu().item()\n        for i in range(future_second):\n            if fut_valid_flag:\n                cur_time = (i+1)*2\n                traj_L2 = self.planning_metric.compute_L2(\n                    pred_ego_fut_trajs[0, :cur_time].detach().to(gt_ego_fut_trajs.device),\n                    gt_ego_fut_trajs[0, :cur_time]\n                )\n\n                obj_coll, obj_box_coll = self.planning_metric.evaluate_coll(\n                    pred_ego_fut_trajs[:, :cur_time].detach().to(gt_ego_fut_trajs.device),\n                    gt_ego_fut_trajs[:, :cur_time],\n                    gt_fut_segmentations,\n                    index = [each['index'] for each in img_metas],\n                    ignore_gt=False,\n                )\n                metric_dict['plan_L2_{}s'.format(i+1)] = traj_L2\n                metric_dict['plan_obj_col_{}s'.format(i+1)] = obj_coll.max().item()\n                metric_dict['plan_obj_box_col_{}s'.format(i+1)] = obj_box_coll.max().item()\n        \n        for i in range(future_second):\n            if fut_valid_flag:\n                cur_time = (i+1)*2\n                obj_coll, obj_box_coll = self.planning_metric.evaluate_coll(\n                    pred_ego_fut_trajs[:, :cur_time].detach().to(gt_ego_fut_trajs.device),\n                    gt_ego_fut_trajs[:, :cur_time],\n                    gt_fut_segmentations_plus,\n                    index = [each['index'] for each in img_metas],\n                    ignore_gt=False,\n                )\n                metric_dict['plan_obj_col_plus_{}s'.format(i+1)] = obj_coll.mean().item()\n                metric_dict['plan_obj_box_col_plus_{}s'.format(i+1)] = obj_box_coll.max().item()\n\n        l2_dist = (pred_ego_fut_trajs-gt_ego_fut_trajs).norm(dim=-1) * gt_ego_fut_masks[:, None]\n\n        l2_dist[gt_ego_fut_masks[:, None]==0] = -1\n        metric_dict['l2_dist'] = l2_dist[0].cpu()\n        ret_list = []\n        num_samples = len(pred_ego_fut_trajs)\n        assert num_samples == 1\n        \n        index_w_scene = img_metas[0]['scene_name'] + '-' + str(img_metas[0]['index'])\n\n        for i in range(num_samples):\n            ret_list.append(\n                dict(\n                    pred_ego_fut_trajs = pred_ego_fut_trajs[i].cpu(),\n                    gt_ego_fut_trajs = gt_ego_fut_trajs[i].cpu(),\n                    metric_dict = metric_dict,\n                    l2_dist=l2_dist[i].cpu(),\n                    index_w_scene = index_w_scene,\n                    ego_trajs_in_global = ego_trajs_in_global[i].cpu(),\n                    gt_ego_fut_cmd = gt_ego_fut_cmd[i].cpu(),\n                    index = img_metas[i]['index']\n                    )\n                )\n        return ret_list\n\n\nclass MLN(nn.Module):\n    ''' \n    Args:\n        c_dim (int): dimension of latent code c\n        f_dim (int): feature dimension\n    '''\n\n    def __init__(self, c_dim, f_dim=256, use_ln=True):\n        super().__init__()\n        self.c_dim = c_dim\n        self.f_dim = f_dim\n        self.use_ln = use_ln\n\n        self.reduce = nn.Sequential(\n            nn.Linear(c_dim, f_dim),\n            nn.ReLU(),\n        )\n        self.gamma = nn.Linear(f_dim, f_dim)\n        self.beta = nn.Linear(f_dim, f_dim)\n        if self.use_ln:\n            self.ln = nn.LayerNorm(f_dim, elementwise_affine=False)\n        self.init_weight()\n\n    def init_weight(self):\n        nn.init.zeros_(self.gamma.weight)\n        nn.init.zeros_(self.beta.weight)\n        nn.init.ones_(self.gamma.bias)\n        nn.init.zeros_(self.beta.bias)\n\n    def forward(self, x, c):\n        if self.use_ln:\n            x = self.ln(x)\n        c = self.reduce(c)\n        gamma = self.gamma(c)\n        beta = self.beta(c)\n        out = gamma * x + beta\n\n        return out"
  },
  {
    "path": "mmdet3d/models/fbbev/planner_head/plan_loss.py",
    "content": "import math\nimport mmcv\nimport torch\nfrom torch import nn as nn\nfrom mmdet.models import weighted_loss\nfrom mmdet.models.builder import LOSSES\n\n\n\n\n@LOSSES.register_module()\nclass PlanMapBoundLoss(nn.Module):\n    \"\"\"Planning constraint to push ego vehicle away from the lane boundary.\n\n    Args:\n        reduction (str, optional): The method to reduce the loss.\n            Options are \"none\", \"mean\" and \"sum\".\n        loss_weight (float, optional): The weight of loss.\n        map_thresh (float, optional): confidence threshold to filter map predictions.\n        lane_bound_cls_idx (float, optional): lane_boundary class index.\n        dis_thresh (float, optional): distance threshold between ego vehicle and lane bound.\n        point_cloud_range (list, optional): point cloud range.\n    \"\"\"\n\n    def __init__(\n        self,\n        reduction='mean',\n        loss_weight=1.0,\n        map_thresh=0.5,\n        lane_bound_cls_idx=2,\n        dis_thresh=1.0,\n        point_cloud_range=[-15.0, -30.0, -2.0, 15.0, 30.0, 2.0],\n        perception_detach=False\n    ):\n        super(PlanMapBoundLoss, self).__init__()\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n        self.map_thresh = map_thresh\n        self.lane_bound_cls_idx = lane_bound_cls_idx\n        self.dis_thresh = dis_thresh\n        self.pc_range = point_cloud_range\n        self.perception_detach = perception_detach\n\n    def forward(self,\n                ego_fut_preds,\n                lane_preds,\n                lane_score_preds,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function.\n\n        Args:\n            ego_fut_preds (Tensor): [B, fut_ts, 2]\n            lane_preds (Tensor): [B, num_vec, num_pts, 2]\n            lane_score_preds (Tensor): [B, num_vec, 3]\n            weight (torch.Tensor, optional): The weight of loss for each\n                prediction. Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n\n        if self.perception_detach:\n            lane_preds = lane_preds.detach()\n            lane_score_preds = lane_score_preds.detach()\n\n        # filter lane element according to confidence score and class\n        not_lane_bound_mask = lane_score_preds[..., self.lane_bound_cls_idx] < self.map_thresh\n        # denormalize map pts\n        lane_bound_preds = lane_preds.clone()\n        lane_bound_preds[...,0:1] = (lane_bound_preds[..., 0:1] * (self.pc_range[3] -\n                                self.pc_range[0]) + self.pc_range[0])\n        lane_bound_preds[...,1:2] = (lane_bound_preds[..., 1:2] * (self.pc_range[4] -\n                                self.pc_range[1]) + self.pc_range[1])\n        # pad not-lane-boundary cls and low confidence preds\n        lane_bound_preds[not_lane_bound_mask] = 1e6\n\n        loss_bbox = self.loss_weight * plan_map_bound_loss(ego_fut_preds, lane_bound_preds,\n                                                           weight=weight, dis_thresh=self.dis_thresh,\n                                                           reduction=reduction, avg_factor=avg_factor)\n        return loss_bbox\n\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef plan_map_bound_loss(pred, target, dis_thresh=1.0):\n    \"\"\"Planning map bound constraint (L1 distance).\n\n    Args:\n        pred (torch.Tensor): ego_fut_preds, [B, fut_ts, 2].\n        target (torch.Tensor): lane_bound_preds, [B, num_vec, num_pts, 2].\n        weight (torch.Tensor): [B, fut_ts]\n\n    Returns:\n        torch.Tensor: Calculated loss [B, fut_ts]\n    \"\"\"\n    pred = pred.cumsum(dim=-2)\n    ego_traj_starts = pred[:, :-1, :]\n    ego_traj_ends = pred\n    B, T, _ = ego_traj_ends.size()\n    padding_zeros = torch.zeros((B, 1, 2), dtype=pred.dtype, device=pred.device)  # initial position\n    ego_traj_starts = torch.cat((padding_zeros, ego_traj_starts), dim=1)\n    _, V, P, _ = target.size()\n    ego_traj_expanded = ego_traj_ends.unsqueeze(2).unsqueeze(3)  # [B, T, 1, 1, 2]\n    maps_expanded = target.unsqueeze(1)  # [1, 1, M, P, 2]\n    dist = torch.linalg.norm(ego_traj_expanded - maps_expanded, dim=-1)  # [B, T, M, P]\n    dist = dist.min(dim=-1, keepdim=False)[0]\n    min_inst_idxs = torch.argmin(dist, dim=-1).tolist()\n    batch_idxs = [[i] for i in range(dist.shape[0])]\n    ts_idxs = [[i for i in range(dist.shape[1])] for j in range(dist.shape[0])]\n    bd_target = target.unsqueeze(1).repeat(1, pred.shape[1], 1, 1, 1)\n    min_bd_insts = bd_target[batch_idxs, ts_idxs, min_inst_idxs]  # [B, T, P, 2]\n    bd_inst_starts = min_bd_insts[:, :, :-1, :].flatten(0, 2)\n    bd_inst_ends = min_bd_insts[:, :, 1:, :].flatten(0, 2)\n    ego_traj_starts = ego_traj_starts.unsqueeze(2).repeat(1, 1, P-1, 1).flatten(0, 2)\n    ego_traj_ends = ego_traj_ends.unsqueeze(2).repeat(1, 1, P-1, 1).flatten(0, 2)\n\n    intersect_mask = segments_intersect(ego_traj_starts, ego_traj_ends,\n                                        bd_inst_starts, bd_inst_ends)\n    intersect_mask = intersect_mask.reshape(B, T, P-1)\n    intersect_mask = intersect_mask.any(dim=-1)\n    intersect_idx = (intersect_mask == True).nonzero()\n\n    target = target.view(target.shape[0], -1, target.shape[-1])\n    # [B, fut_ts, num_vec*num_pts]\n    dist = torch.linalg.norm(pred[:, :, None, :] - target[:, None, :, :], dim=-1)\n    min_idxs = torch.argmin(dist, dim=-1).tolist()\n    batch_idxs = [[i] for i in range(dist.shape[0])]\n    ts_idxs = [[i for i in range(dist.shape[1])] for j in range(dist.shape[0])]\n    min_dist = dist[batch_idxs, ts_idxs, min_idxs]\n    loss = min_dist\n    safe_idx = loss > dis_thresh\n    unsafe_idx = loss <= dis_thresh\n    loss[safe_idx] = 0\n    loss[unsafe_idx] = dis_thresh - loss[unsafe_idx]\n\n    for i in range(len(intersect_idx)):\n        loss[intersect_idx[i, 0], intersect_idx[i, 1]:] = 0\n\n    return loss\n\n\ndef segments_intersect(line1_start, line1_end, line2_start, line2_end):\n    # Calculating the differences\n    dx1 = line1_end[:, 0] - line1_start[:, 0]\n    dy1 = line1_end[:, 1] - line1_start[:, 1]\n    dx2 = line2_end[:, 0] - line2_start[:, 0]\n    dy2 = line2_end[:, 1] - line2_start[:, 1]\n\n    # Calculating determinants\n    det = dx1 * dy2 - dx2 * dy1\n    det_mask = det != 0\n\n    # Checking if lines are parallel or coincident\n    parallel_mask = torch.logical_not(det_mask)\n\n    # Calculating intersection parameters\n    t1 = ((line2_start[:, 0] - line1_start[:, 0]) * dy2 \n          - (line2_start[:, 1] - line1_start[:, 1]) * dx2) / det\n    t2 = ((line2_start[:, 0] - line1_start[:, 0]) * dy1 \n          - (line2_start[:, 1] - line1_start[:, 1]) * dx1) / det\n\n    # Checking intersection conditions\n    intersect_mask = torch.logical_and(\n        torch.logical_and(t1 >= 0, t1 <= 1),\n        torch.logical_and(t2 >= 0, t2 <= 1)\n    )\n\n    # Handling parallel or coincident lines\n    intersect_mask[parallel_mask] = False\n\n    return intersect_mask\n\n\n@LOSSES.register_module()\nclass PlanCollisionLoss(nn.Module):\n    \"\"\"Planning constraint to push ego vehicle away from other agents.\n\n    Args:\n        reduction (str, optional): The method to reduce the loss.\n            Options are \"none\", \"mean\" and \"sum\".\n        loss_weight (float, optional): The weight of loss.\n        agent_thresh (float, optional): confidence threshold to filter agent predictions.\n        x_dis_thresh (float, optional): distance threshold between ego and other agents in x-axis.\n        y_dis_thresh (float, optional): distance threshold between ego and other agents in y-axis.\n        point_cloud_range (list, optional): point cloud range.\n    \"\"\"\n\n    def __init__(\n        self,\n        reduction='mean',\n        loss_weight=1.0,\n        agent_thresh=0.5,\n        x_dis_thresh=3.0,\n        y_dis_thresh=1.5,\n        point_cloud_range = [-15.0, -30.0, -2.0, 15.0, 30.0, 2.0]\n    ):\n        super(PlanCollisionLoss, self).__init__()\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n        self.agent_thresh = agent_thresh\n        self.x_dis_thresh = x_dis_thresh\n        self.y_dis_thresh = y_dis_thresh\n        self.pc_range = point_cloud_range\n\n    def forward(self,\n                ego_fut_preds,\n                agent_fut_preds,\n                agent_score_preds,\n                agent_fut_cls_preds,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function.\n\n        Args:\n            ego_fut_preds (Tensor): [B, fut_ts, 2]\n            agent_preds (Tensor): [B, num_agent, 2]\n            agent_fut_preds (Tensor): [B, num_agent, fut_mode, fut_ts, 2]\n            agent_fut_cls_preds (Tensor): [B, num_agent, fut_mode]\n            agent_score_preds (Tensor): [B, num_agent]\n            weight (torch.Tensor, optional): The weight of loss for each\n                prediction. Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n\n        # filter agent element according to confidence score\n        # agent_max_score_preds, agent_max_score_idxs = agent_score_preds # .max(dim=-1)\n        not_valid_agent_mask = agent_score_preds < self.agent_thresh\n        # filter low confidence preds\n        agent_fut_preds[not_valid_agent_mask] = 1e6\n        # filter not vehicle preds\n        # not_veh_pred_mask = agent_max_score_idxs > 4  # veh idxs are 0-4\n        # agent_fut_preds[not_veh_pred_mask] = 1e6\n        # only use best mode pred\n        best_mode_idxs = torch.argmax(agent_fut_cls_preds, dim=-1).tolist()\n        batch_idxs = [[i] for i in range(agent_fut_cls_preds.shape[0])]\n        agent_num_idxs = [[i for i in range(agent_fut_cls_preds.shape[1])] for j in range(agent_fut_cls_preds.shape[0])]\n        agent_fut_preds = agent_fut_preds[batch_idxs, agent_num_idxs, best_mode_idxs]\n\n        loss_bbox = self.loss_weight * plan_col_loss(ego_fut_preds,\n                                                           target=agent_fut_preds, weight=weight,\n                                                           x_dis_thresh=self.x_dis_thresh,\n                                                           y_dis_thresh=self.y_dis_thresh,\n                                                           reduction=reduction, avg_factor=avg_factor)\n        return loss_bbox\n\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef plan_col_loss(\n    pred,\n    target,\n    x_dis_thresh=3.0,\n    y_dis_thresh=1.5,\n    dis_thresh=3.0\n):\n    \"\"\"Planning ego-agent collsion constraint.\n\n    Args:\n        pred (torch.Tensor): ego_fut_preds, [B, fut_ts, 2].\n        target (torch.Tensor): agent_preds, [B, num_agent, 2].\n        agent_fut_preds (Tensor): [B, num_agent, fut_ts, 2].\n        weight (torch.Tensor): [B, fut_ts, 2].\n        x_dis_thresh (float, optional): distance threshold between ego and other agents in x-axis.\n        y_dis_thresh (float, optional): distance threshold between ego and other agents in y-axis.\n        dis_thresh (float, optional): distance threshold to filter distant agents.\n\n    Returns:\n        torch.Tensor: Calculated loss [B, fut_mode, fut_ts, 2]\n    \"\"\"\n\n    pred = pred.cumsum(dim=-2)\n    # agent_fut_preds = agent_fut_preds.cumsum(dim=-2)\n    # target = target[:, :, None, :] + agent_fut_preds\n    # filter distant agents from ego vehicle\n\n    dist = torch.linalg.norm(pred[:, None, :, :] - target, dim=-1)\n    dist_mask = dist > dis_thresh\n    target[dist_mask] = 1e6\n\n    # [B, num_agent, fut_ts]\n    x_dist = torch.abs(pred[:, None, :, 0] - target[..., 0])\n    y_dist = torch.abs(pred[:, None, :, 1] - target[..., 1])\n    x_min_idxs = torch.argmin(x_dist, dim=1).tolist()\n    y_min_idxs = torch.argmin(y_dist, dim=1).tolist()\n    batch_idxs = [[i] for i in range(y_dist.shape[0])]\n    ts_idxs = [[i for i in range(y_dist.shape[-1])] for j in range(y_dist.shape[0])]\n\n    # [B, fut_ts]\n    x_min_dist = x_dist[batch_idxs, x_min_idxs, ts_idxs]\n    y_min_dist = y_dist[batch_idxs, y_min_idxs, ts_idxs]\n    x_loss = x_min_dist\n    safe_idx = x_loss > x_dis_thresh\n    unsafe_idx = x_loss <= x_dis_thresh\n    x_loss[safe_idx] = 0\n    x_loss[unsafe_idx] = x_dis_thresh - x_loss[unsafe_idx]\n    y_loss = y_min_dist\n    safe_idx = y_loss > y_dis_thresh\n    unsafe_idx = y_loss <= y_dis_thresh\n    y_loss[safe_idx] = 0\n    y_loss[unsafe_idx] = y_dis_thresh - y_loss[unsafe_idx]\n    loss = torch.cat([x_loss.unsqueeze(-1), y_loss.unsqueeze(-1)], dim=-1)\n\n    return loss\n\n\n@LOSSES.register_module()\nclass PlanMapDirectionLoss(nn.Module):\n    \"\"\"Planning loss to force the ego heading angle consistent with lane direction.\n\n    Args:\n        reduction (str, optional): The method to reduce the loss.\n            Options are \"none\", \"mean\" and \"sum\".\n        loss_weight (float, optional): The weight of loss.\n        theta_thresh (float, optional): angle diff thresh between ego and lane.\n        point_cloud_range (list, optional): point cloud range.\n    \"\"\"\n\n    def __init__(\n        self,\n        reduction='mean',\n        loss_weight=1.0,\n        map_thresh=0.5,\n        dis_thresh=2.0,\n        lane_div_cls_idx=1,\n        point_cloud_range = [-15.0, -30.0, -2.0, 15.0, 30.0, 2.0]\n    ):\n        super(PlanMapDirectionLoss, self).__init__()\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n        self.map_thresh = map_thresh\n        self.dis_thresh = dis_thresh\n        self.lane_div_cls_idx = lane_div_cls_idx\n        self.pc_range = point_cloud_range\n\n    def forward(self,\n                ego_fut_preds,\n                lane_preds,\n                lane_score_preds,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function.\n\n        Args:\n            ego_fut_preds (Tensor): [B, fut_ts, 2]\n            lane_preds (Tensor): [B, num_vec, num_pts, 2]\n            lane_score_preds (Tensor): [B, num_vec, 3]\n            weight (torch.Tensor, optional): The weight of loss for each\n                prediction. Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n\n        # filter lane element according to confidence score and class\n        not_lane_div_mask = lane_score_preds[..., self.lane_div_cls_idx] < self.map_thresh\n        # denormalize map pts\n        lane_div_preds = lane_preds.clone()\n        lane_div_preds[...,0:1] = (lane_div_preds[..., 0:1] * (self.pc_range[3] -\n                                self.pc_range[0]) + self.pc_range[0])\n        lane_div_preds[...,1:2] = (lane_div_preds[..., 1:2] * (self.pc_range[4] -\n                                self.pc_range[1]) + self.pc_range[1])\n        # pad not-lane-divider cls and low confidence preds\n        lane_div_preds[not_lane_div_mask] = 1e6\n\n        loss_bbox = self.loss_weight * plan_map_dir_loss(ego_fut_preds, lane_div_preds,\n                                                           weight=weight, dis_thresh=self.dis_thresh,\n                                                           reduction=reduction, avg_factor=avg_factor)\n        return loss_bbox\n\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef plan_map_dir_loss(pred, target, dis_thresh=2.0):\n    \"\"\"Planning ego-map directional loss.\n\n    Args:\n        pred (torch.Tensor): ego_fut_preds, [B, fut_ts, 2].\n        target (torch.Tensor): lane_div_preds, [B, num_vec, num_pts, 2].\n        weight (torch.Tensor): [B, fut_ts]\n\n    Returns:\n        torch.Tensor: Calculated loss [B, fut_ts]\n    \"\"\"\n    num_map_pts = target.shape[2]\n    pred = pred.cumsum(dim=-2)\n    traj_dis = torch.linalg.norm(pred[:, -1, :] - pred[:, 0, :], dim=-1)\n    static_mask = traj_dis < 1.0\n    target = target.unsqueeze(1).repeat(1, pred.shape[1], 1, 1, 1)\n\n    # find the closest map instance for ego at each timestamp\n    dist = torch.linalg.norm(pred[:, :, None, None, :] - target, dim=-1)\n    dist = dist.min(dim=-1, keepdim=False)[0]\n    min_inst_idxs = torch.argmin(dist, dim=-1).tolist()\n    batch_idxs = [[i] for i in range(dist.shape[0])]\n    ts_idxs = [[i for i in range(dist.shape[1])] for j in range(dist.shape[0])]\n    target_map_inst = target[batch_idxs, ts_idxs, min_inst_idxs]  # [B, fut_ts, num_pts, 2]\n\n    # calculate distance\n    dist = torch.linalg.norm(pred[:, :, None, :] - target_map_inst, dim=-1)\n    min_pts_idxs = torch.argmin(dist, dim=-1)\n    min_pts_next_idxs = min_pts_idxs.clone()\n    is_end_point = (min_pts_next_idxs == num_map_pts-1)\n    not_end_point = (min_pts_next_idxs != num_map_pts-1)\n    min_pts_next_idxs[is_end_point] = num_map_pts - 2\n    min_pts_next_idxs[not_end_point] = min_pts_next_idxs[not_end_point] + 1\n    min_pts_idxs = min_pts_idxs.tolist()\n    min_pts_next_idxs = min_pts_next_idxs.tolist()\n    traj_yaw = torch.atan2(torch.diff(pred[..., 1]), torch.diff(pred[..., 0]))  # [B, fut_ts-1]\n    # last ts yaw assume same as previous\n    traj_yaw = torch.cat([traj_yaw, traj_yaw[:, [-1]]], dim=-1)  # [B, fut_ts]\n    min_pts = target_map_inst[batch_idxs, ts_idxs, min_pts_idxs]\n    dist = torch.linalg.norm(min_pts - pred, dim=-1)\n    dist_mask = dist > dis_thresh\n    min_pts = min_pts.unsqueeze(2)\n    min_pts_next = target_map_inst[batch_idxs, ts_idxs, min_pts_next_idxs].unsqueeze(2)\n    map_pts = torch.cat([min_pts, min_pts_next], dim=2)\n    lane_yaw = torch.atan2(torch.diff(map_pts[..., 1]).squeeze(-1), torch.diff(map_pts[..., 0]).squeeze(-1))  # [B, fut_ts]\n    yaw_diff = traj_yaw - lane_yaw\n    yaw_diff[yaw_diff > math.pi] =  yaw_diff[yaw_diff > math.pi] - math.pi\n    yaw_diff[yaw_diff > math.pi/2] = yaw_diff[yaw_diff > math.pi/2] - math.pi\n    yaw_diff[yaw_diff < -math.pi] = yaw_diff[yaw_diff < -math.pi] + math.pi\n    yaw_diff[yaw_diff < -math.pi/2] = yaw_diff[yaw_diff < -math.pi/2] + math.pi\n    yaw_diff[dist_mask] = 0  # loss = 0 if no lane around ego\n    yaw_diff[static_mask] = 0  # loss = 0 if ego is static\n\n    loss = torch.abs(yaw_diff)\n\n    return loss  # [B, fut_ts]\n\n\n\n@LOSSES.register_module()\nclass PlanMapDirectionLoss2(nn.Module):\n    \"\"\"Planning loss to force the ego heading angle consistent with lane direction.\n\n    Args:\n        reduction (str, optional): The method to reduce the loss.\n            Options are \"none\", \"mean\" and \"sum\".\n        loss_weight (float, optional): The weight of loss.\n        theta_thresh (float, optional): angle diff thresh between ego and lane.\n        point_cloud_range (list, optional): point cloud range.\n    \"\"\"\n\n    def __init__(\n        self,\n        reduction='mean',\n        loss_weight=1.0,\n        map_thresh=0.5,\n        dis_thresh=2.0,\n        lane_div_cls_idx=1,\n        point_cloud_range = [-15.0, -30.0, -2.0, 15.0, 30.0, 2.0]\n    ):\n        super(PlanMapDirectionLoss2, self).__init__()\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n        self.map_thresh = map_thresh\n        self.dis_thresh = dis_thresh\n        self.lane_div_cls_idx = lane_div_cls_idx\n        self.pc_range = point_cloud_range\n\n    def forward(self,\n                ego_fut_preds,\n                lane_preds,\n                lane_score_preds,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function.\n\n        Args:\n            ego_fut_preds (Tensor): [B, fut_ts, 2]\n            lane_preds (Tensor): [B, num_vec, num_pts, 2]\n            lane_score_preds (Tensor): [B, num_vec, 3]\n            weight (torch.Tensor, optional): The weight of loss for each\n                prediction. Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n\n        # filter lane element according to confidence score and class\n        not_lane_div_mask = lane_score_preds[..., self.lane_div_cls_idx] < self.map_thresh\n        # denormalize map pts\n        lane_div_preds = lane_preds.clone()\n        lane_div_preds[...,0:1] = (lane_div_preds[..., 0:1] * (self.pc_range[3] -\n                                self.pc_range[0]) + self.pc_range[0])\n        lane_div_preds[...,1:2] = (lane_div_preds[..., 1:2] * (self.pc_range[4] -\n                                self.pc_range[1]) + self.pc_range[1])\n        # pad not-lane-divider cls and low confidence preds\n        lane_div_preds[not_lane_div_mask] = 1e6\n\n        loss_bbox = self.loss_weight * plan_map_dir_loss2(ego_fut_preds, lane_div_preds,\n                                                           weight=weight, dis_thresh=self.dis_thresh,\n                                                           reduction=reduction, avg_factor=avg_factor)\n        return loss_bbox\n\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef plan_map_dir_loss2(pred, target, dis_thresh=2.0):\n    \"\"\"Planning ego-map directional loss.\n\n    Args:\n        pred (torch.Tensor): ego_fut_preds, [B, fut_ts, 2].\n        target (torch.Tensor): lane_div_preds, [B, num_vec, num_pts, 2].\n        weight (torch.Tensor): [B, fut_ts]\n\n    Returns:\n        torch.Tensor: Calculated loss [B, fut_ts]\n    \"\"\"\n    num_map_pts = target.shape[2]\n    pred = pred.cumsum(dim=-2)\n    traj_dis = torch.linalg.norm(pred[:, -1, :] - pred[:, 0, :], dim=-1)\n    static_mask = traj_dis < 1.0\n    target = target.unsqueeze(1).repeat(1, pred.shape[1], 1, 1, 1)\n\n    # find the closest map instance for ego at each timestamp\n    dist = torch.linalg.norm(pred[:, :, None, None, :] - target, dim=-1)\n    dist = dist.min(dim=-1, keepdim=False)[0]\n    min_inst_idxs = torch.argmin(dist, dim=-1).tolist()\n    batch_idxs = [[i] for i in range(dist.shape[0])]\n    ts_idxs = [[i for i in range(dist.shape[1])] for j in range(dist.shape[0])]\n    target_map_inst = target[batch_idxs, ts_idxs, min_inst_idxs]  # [B, fut_ts, num_pts, 2]\n\n    # calculate distance\n    dist = torch.linalg.norm(pred[:, :, None, :] - target_map_inst, dim=-1)\n    min_pts_idxs = torch.argmin(dist, dim=-1)\n    min_pts_next_idxs = min_pts_idxs.clone()\n    is_end_point = (min_pts_next_idxs == num_map_pts-1)\n    not_end_point = (min_pts_next_idxs != num_map_pts-1)\n    min_pts_next_idxs[is_end_point] = num_map_pts - 2\n    min_pts_next_idxs[not_end_point] = min_pts_next_idxs[not_end_point] + 1\n    min_pts_idxs = min_pts_idxs.tolist()\n    min_pts_next_idxs = min_pts_next_idxs.tolist()\n    traj_yaw = torch.atan2(torch.diff(pred[..., 0]), torch.diff(pred[..., 1]))  # [B, fut_ts-1]\n    # last ts yaw assume same as previous\n    traj_yaw = torch.cat([traj_yaw, traj_yaw[:, [-1]]], dim=-1)  # [B, fut_ts]\n    min_pts = target_map_inst[batch_idxs, ts_idxs, min_pts_idxs]\n    dist = torch.linalg.norm(min_pts - pred, dim=-1)\n    dist_mask = dist > dis_thresh\n    min_pts = min_pts.unsqueeze(2)\n    min_pts_next = target_map_inst[batch_idxs, ts_idxs, min_pts_next_idxs].unsqueeze(2)\n    map_pts = torch.cat([min_pts, min_pts_next], dim=2)\n    lane_yaw = torch.atan2(torch.diff(map_pts[..., 0]).squeeze(-1), torch.diff(map_pts[..., 1]).squeeze(-1))  # [B, fut_ts]\n    yaw_diff = traj_yaw - lane_yaw\n    yaw_diff[yaw_diff > math.pi] =  yaw_diff[yaw_diff > math.pi] - math.pi\n    yaw_diff[yaw_diff > math.pi/2] = yaw_diff[yaw_diff > math.pi/2] - math.pi\n    yaw_diff[yaw_diff < -math.pi] = yaw_diff[yaw_diff < -math.pi] + math.pi\n    yaw_diff[yaw_diff < -math.pi/2] = yaw_diff[yaw_diff < -math.pi/2] + math.pi\n    yaw_diff[dist_mask] = 0  # loss = 0 if no lane around ego\n    yaw_diff[static_mask] = 0  # loss = 0 if ego is static\n\n    loss = torch.abs(yaw_diff)\n\n    return loss  # [B, fut_ts]"
  },
  {
    "path": "mmdet3d/models/fbbev/planner_head/plan_loss_gt.py",
    "content": "# Copyright (c) 2023-2024, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# TODO: add license here\n\n\nimport math\nimport mmcv\nimport torch\nfrom torch import nn as nn\nfrom mmdet.models import weighted_loss\nfrom mmdet.models.builder import LOSSES\n\n@LOSSES.register_module()\nclass PlanMapBoundLoss_gt(nn.Module):\n    \"\"\"Planning constraint to push ego vehicle away from the lane boundary.\n\n    Args:\n        reduction (str, optional): The method to reduce the loss.\n            Options are \"none\", \"mean\" and \"sum\".\n        loss_weight (float, optional): The weight of loss.\n        map_thresh (float, optional): confidence threshold to filter map predictions.\n        lane_bound_cls_idx (float, optional): lane_boundary class index.\n        dis_thresh (float, optional): distance threshold between ego vehicle and lane bound.\n        point_cloud_range (list, optional): point cloud range.\n    \"\"\"\n\n    def __init__(\n        self,\n        reduction='mean',\n        loss_weight=1.0,\n        map_thresh=0.5,\n        lane_bound_cls_idx=2,\n        dis_thresh=1.0,\n        point_cloud_range=[-15.0, -30.0, -2.0, 15.0, 30.0, 2.0],\n        perception_detach=False\n    ):\n        super(PlanMapBoundLoss_gt, self).__init__()\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n        self.map_thresh = map_thresh\n        self.lane_bound_cls_idx = lane_bound_cls_idx\n        self.dis_thresh = dis_thresh\n        self.pc_range = point_cloud_range\n        self.perception_detach = perception_detach\n\n    def forward(self,\n                ego_fut_preds,\n                lane_gt,\n                lane_labels,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function.\n\n        Args:\n            ego_fut_preds (Tensor): [B, fut_ts, 2]\n            lane_preds (Tensor): [B, num_vec, num_pts, 2]\n            lane_score_preds (Tensor): [B, num_vec, 3]\n            weight (torch.Tensor, optional): The weight of loss for each\n                prediction. Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n\n\n        # filter lane element according to confidence score and class\n        not_lane_bound_mask = lane_labels != self.lane_bound_cls_idx\n        # denormalize map pts\n        lane_bound_preds = lane_gt.clone()\n        lane_bound_preds[...,0:1] = (lane_bound_preds[..., 0:1] * (self.pc_range[3] -\n                                self.pc_range[0]) + self.pc_range[0])\n        lane_bound_preds[...,1:2] = (lane_bound_preds[..., 1:2] * (self.pc_range[4] -\n                                self.pc_range[1]) + self.pc_range[1])\n        # pad not-lane-boundary cls and low confidence preds\n        lane_bound_preds[not_lane_bound_mask] = 1e6\n\n        loss_bbox = self.loss_weight * plan_map_bound_loss(ego_fut_preds, lane_bound_preds,\n                                                           weight=weight, dis_thresh=self.dis_thresh,\n                                                           reduction=reduction, avg_factor=avg_factor)\n        return loss_bbox\n\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef plan_map_bound_loss(pred, target, dis_thresh=1.0):\n    \"\"\"Planning map bound constraint (L1 distance).\n\n    Args:\n        pred (torch.Tensor): ego_fut_preds, [B, fut_ts, 2].\n        target (torch.Tensor): lane_bound_preds, [B, num_vec, num_pts, 2].\n        weight (torch.Tensor): [B, fut_ts]\n\n    Returns:\n        torch.Tensor: Calculated loss [B, fut_ts]\n    \"\"\"\n    pred = pred.cumsum(dim=-2)\n    ego_traj_starts = pred[:, :-1, :]\n    ego_traj_ends = pred\n    B, T, _ = ego_traj_ends.size()\n    padding_zeros = torch.zeros((B, 1, 2), dtype=pred.dtype, device=pred.device)  # initial position\n    ego_traj_starts = torch.cat((padding_zeros, ego_traj_starts), dim=1)\n    _, V, P, _ = target.size()\n    ego_traj_expanded = ego_traj_ends.unsqueeze(2).unsqueeze(3)  # [B, T, 1, 1, 2]\n    maps_expanded = target.unsqueeze(1)  # [1, 1, M, P, 2]\n    dist = torch.linalg.norm(ego_traj_expanded - maps_expanded, dim=-1)  # [B, T, M, P]\n    dist = dist.min(dim=-1, keepdim=False)[0]\n    min_inst_idxs = torch.argmin(dist, dim=-1).tolist()\n    batch_idxs = [[i] for i in range(dist.shape[0])]\n    ts_idxs = [[i for i in range(dist.shape[1])] for j in range(dist.shape[0])]\n    bd_target = target.unsqueeze(1).repeat(1, pred.shape[1], 1, 1, 1)\n    min_bd_insts = bd_target[batch_idxs, ts_idxs, min_inst_idxs]  # [B, T, P, 2]\n    bd_inst_starts = min_bd_insts[:, :, :-1, :].flatten(0, 2)\n    bd_inst_ends = min_bd_insts[:, :, 1:, :].flatten(0, 2)\n    ego_traj_starts = ego_traj_starts.unsqueeze(2).repeat(1, 1, P-1, 1).flatten(0, 2)\n    ego_traj_ends = ego_traj_ends.unsqueeze(2).repeat(1, 1, P-1, 1).flatten(0, 2)\n\n    intersect_mask = segments_intersect(ego_traj_starts, ego_traj_ends,\n                                        bd_inst_starts, bd_inst_ends)\n    intersect_mask = intersect_mask.reshape(B, T, P-1)\n    intersect_mask = intersect_mask.any(dim=-1)\n    intersect_idx = (intersect_mask == True).nonzero()\n\n    target = target.view(target.shape[0], -1, target.shape[-1])\n    # [B, fut_ts, num_vec*num_pts]\n    dist = torch.linalg.norm(pred[:, :, None, :] - target[:, None, :, :], dim=-1)\n    min_idxs = torch.argmin(dist, dim=-1).tolist()\n    batch_idxs = [[i] for i in range(dist.shape[0])]\n    ts_idxs = [[i for i in range(dist.shape[1])] for j in range(dist.shape[0])]\n    min_dist = dist[batch_idxs, ts_idxs, min_idxs]\n    loss = min_dist\n    safe_idx = loss > dis_thresh\n    unsafe_idx = loss <= dis_thresh\n    loss[safe_idx] = 0\n    loss[unsafe_idx] = dis_thresh - loss[unsafe_idx]\n\n    for i in range(len(intersect_idx)):\n        loss[intersect_idx[i, 0], intersect_idx[i, 1]:] = 0\n\n    return loss\n\n\ndef segments_intersect(line1_start, line1_end, line2_start, line2_end):\n    # Calculating the differences\n    dx1 = line1_end[:, 0] - line1_start[:, 0]\n    dy1 = line1_end[:, 1] - line1_start[:, 1]\n    dx2 = line2_end[:, 0] - line2_start[:, 0]\n    dy2 = line2_end[:, 1] - line2_start[:, 1]\n\n    # Calculating determinants\n    det = dx1 * dy2 - dx2 * dy1\n    det_mask = det != 0\n\n    # Checking if lines are parallel or coincident\n    parallel_mask = torch.logical_not(det_mask)\n\n    # Calculating intersection parameters\n    t1 = ((line2_start[:, 0] - line1_start[:, 0]) * dy2 \n          - (line2_start[:, 1] - line1_start[:, 1]) * dx2) / det\n    t2 = ((line2_start[:, 0] - line1_start[:, 0]) * dy1 \n          - (line2_start[:, 1] - line1_start[:, 1]) * dx1) / det\n\n    # Checking intersection conditions\n    intersect_mask = torch.logical_and(\n        torch.logical_and(t1 >= 0, t1 <= 1),\n        torch.logical_and(t2 >= 0, t2 <= 1)\n    )\n\n    # Handling parallel or coincident lines\n    intersect_mask[parallel_mask] = False\n\n    return intersect_mask\n\n\n@LOSSES.register_module()\nclass PlanCollisionLoss_gt(nn.Module):\n    \"\"\"Planning constraint to push ego vehicle away from other agents.\n\n    Args:\n        reduction (str, optional): The method to reduce the loss.\n            Options are \"none\", \"mean\" and \"sum\".\n        loss_weight (float, optional): The weight of loss.\n        agent_thresh (float, optional): confidence threshold to filter agent predictions.\n        x_dis_thresh (float, optional): distance threshold between ego and other agents in x-axis.\n        y_dis_thresh (float, optional): distance threshold between ego and other agents in y-axis.\n        point_cloud_range (list, optional): point cloud range.\n    \"\"\"\n\n    def __init__(\n        self,\n        reduction='mean',\n        loss_weight=1.0,\n        agent_thresh=0.5,\n        x_dis_thresh=3.0,\n        y_dis_thresh=1.5,\n        point_cloud_range = [-15.0, -30.0, -2.0, 15.0, 30.0, 2.0]\n    ):\n        super(PlanCollisionLoss_gt, self).__init__()\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n        self.agent_thresh = agent_thresh\n        self.x_dis_thresh = x_dis_thresh\n        self.y_dis_thresh = y_dis_thresh\n        self.pc_range = point_cloud_range\n\n    def forward(self,\n                ego_fut_preds,\n                agent_fut_preds,\n                # agent_score_preds,\n                # agent_fut_cls_preds,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function.\n\n        Args:\n            ego_fut_preds (Tensor): [B, fut_ts, 2]\n            agent_preds (Tensor): [B, num_agent, 2]\n            agent_fut_preds (Tensor): [B, num_agent, fut_ts, 2]\n            agent_fut_cls_preds (Tensor): [B, num_agent, fut_mode]\n            agent_score_preds (Tensor): [B, num_agent]\n            weight (torch.Tensor, optional): The weight of loss for each\n                prediction. Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n\n        # filter agent element according to confidence score\n        # agent_max_score_preds, agent_max_score_idxs = agent_score_preds # .max(dim=-1)\n        # not_valid_agent_mask = agent_score_preds < self.agent_thresh\n        # filter low confidence preds\n        # agent_fut_preds[not_valid_agent_mask] = 1e6\n        # filter not vehicle preds\n        # not_veh_pred_mask = agent_max_score_idxs > 4  # veh idxs are 0-4\n        # agent_fut_preds[not_veh_pred_mask] = 1e6\n        # only use best mode pred\n        # best_mode_idxs = torch.argmax(agent_fut_cls_preds, dim=-1).tolist()\n        # batch_idxs = [[i] for i in range(agent_fut_cls_preds.shape[0])]\n        # agent_num_idxs = [[i for i in range(agent_fut_cls_preds.shape[1])] for j in range(agent_fut_cls_preds.shape[0])]\n        # agent_fut_preds = agent_fut_preds[batch_idxs, agent_num_idxs, best_mode_idxs]\n\n        loss_bbox = self.loss_weight * plan_col_loss(ego_fut_preds,\n                                                           target=agent_fut_preds, weight=weight,\n                                                           x_dis_thresh=self.x_dis_thresh,\n                                                           y_dis_thresh=self.y_dis_thresh,\n                                                           reduction=reduction, avg_factor=avg_factor)\n        return loss_bbox\n\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef plan_col_loss(\n    pred,\n    target,\n    x_dis_thresh=3.0,\n    y_dis_thresh=1.5,\n    dis_thresh=3.0\n):\n    \"\"\"Planning ego-agent collsion constraint.\n\n    Args:\n        pred (torch.Tensor): ego_fut_preds, [B, fut_ts, 2].\n        target (torch.Tensor): agent_preds, [B, num_agent, 2].\n        agent_fut_preds (Tensor): [B, num_agent, fut_ts, 2].\n        weight (torch.Tensor): [B, fut_ts, 2].\n        x_dis_thresh (float, optional): distance threshold between ego and other agents in x-axis.\n        y_dis_thresh (float, optional): distance threshold between ego and other agents in y-axis.\n        dis_thresh (float, optional): distance threshold to filter distant agents.\n\n    Returns:\n        torch.Tensor: Calculated loss [B, fut_mode, fut_ts, 2]\n    \"\"\"\n    pred = pred.cumsum(dim=-2)\n    # agent_fut_preds = agent_fut_preds.cumsum(dim=-2)\n    # target = target[:, :, None, :] + agent_fut_preds\n    # filter distant agents from ego vehicle\n\n    dist = torch.linalg.norm(pred[:, None, :, :] - target, dim=-1)\n    dist_mask = dist > dis_thresh\n    target[dist_mask] = 1e6\n\n    # [B, num_agent, fut_ts]\n    x_dist = torch.abs(pred[:, None, :, 0] - target[..., 0])\n    y_dist = torch.abs(pred[:, None, :, 1] - target[..., 1])\n    x_min_idxs = torch.argmin(x_dist, dim=1).tolist()\n    y_min_idxs = torch.argmin(y_dist, dim=1).tolist()\n    batch_idxs = [[i] for i in range(y_dist.shape[0])]\n    ts_idxs = [[i for i in range(y_dist.shape[-1])] for j in range(y_dist.shape[0])]\n\n    # [B, fut_ts]\n    x_min_dist = x_dist[batch_idxs, x_min_idxs, ts_idxs]\n    y_min_dist = y_dist[batch_idxs, y_min_idxs, ts_idxs]\n    x_loss = x_min_dist\n    safe_idx = x_loss > x_dis_thresh\n    unsafe_idx = x_loss <= x_dis_thresh\n    x_loss[safe_idx] = 0\n    x_loss[unsafe_idx] = x_dis_thresh - x_loss[unsafe_idx]\n    y_loss = y_min_dist\n    safe_idx = y_loss > y_dis_thresh\n    unsafe_idx = y_loss <= y_dis_thresh\n    y_loss[safe_idx] = 0\n    y_loss[unsafe_idx] = y_dis_thresh - y_loss[unsafe_idx]\n    loss = torch.cat([x_loss.unsqueeze(-1), y_loss.unsqueeze(-1)], dim=-1)\n\n    return loss\n\n\n@LOSSES.register_module()\nclass PlanMapDirectionLoss_gt(nn.Module):\n    \"\"\"Planning loss to force the ego heading angle consistent with lane direction.\n\n    Args:\n        reduction (str, optional): The method to reduce the loss.\n            Options are \"none\", \"mean\" and \"sum\".\n        loss_weight (float, optional): The weight of loss.\n        theta_thresh (float, optional): angle diff thresh between ego and lane.\n        point_cloud_range (list, optional): point cloud range.\n    \"\"\"\n\n    def __init__(\n        self,\n        reduction='mean',\n        loss_weight=1.0,\n        map_thresh=0.5,\n        dis_thresh=2.0,\n        lane_div_cls_idx=1,\n        point_cloud_range = [-15.0, -30.0, -2.0, 15.0, 30.0, 2.0]\n    ):\n        super(PlanMapDirectionLoss_gt, self).__init__()\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n        self.map_thresh = map_thresh\n        self.dis_thresh = dis_thresh\n        self.lane_div_cls_idx = lane_div_cls_idx\n        self.pc_range = point_cloud_range\n\n    def forward(self,\n                ego_fut_preds,\n                lane_gt,\n                lane_labels,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function.\n\n        Args:\n            ego_fut_preds (Tensor): [B, fut_ts, 2]\n            lane_preds (Tensor): [B, num_vec, num_pts, 2]\n            lane_score_preds (Tensor): [B, num_vec, 3]\n            weight (torch.Tensor, optional): The weight of loss for each\n                prediction. Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n\n        # filter lane element according to confidence score and class\n        not_lane_div_mask = lane_labels !=self.lane_div_cls_idx\n        # denormalize map pts\n        lane_div_preds = lane_gt.clone()\n        lane_div_preds[...,0:1] = (lane_div_preds[..., 0:1] * (self.pc_range[3] -\n                                self.pc_range[0]) + self.pc_range[0])\n        lane_div_preds[...,1:2] = (lane_div_preds[..., 1:2] * (self.pc_range[4] -\n                                self.pc_range[1]) + self.pc_range[1])\n        # pad not-lane-divider cls and low confidence preds\n        lane_div_preds[not_lane_div_mask] = 1e6\n\n        loss_bbox = self.loss_weight * plan_map_dir_loss(ego_fut_preds, lane_div_preds,\n                                                           weight=weight, dis_thresh=self.dis_thresh,\n                                                           reduction=reduction, avg_factor=avg_factor)\n        return loss_bbox\n\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef plan_map_dir_loss(pred, target, dis_thresh=2.0):\n    \"\"\"Planning ego-map directional loss.\n\n    Args:\n        pred (torch.Tensor): ego_fut_preds, [B, fut_ts, 2].\n        target (torch.Tensor): lane_div_preds, [B, num_vec, num_pts, 2].\n        weight (torch.Tensor): [B, fut_ts]\n\n    Returns:\n        torch.Tensor: Calculated loss [B, fut_ts]\n    \"\"\"\n    num_map_pts = target.shape[2]\n    pred = pred.cumsum(dim=-2)\n    traj_dis = torch.linalg.norm(pred[:, -1, :] - pred[:, 0, :], dim=-1)\n    static_mask = traj_dis < 1.0\n    target = target.unsqueeze(1).repeat(1, pred.shape[1], 1, 1, 1)\n\n    # find the closest map instance for ego at each timestamp\n    dist = torch.linalg.norm(pred[:, :, None, None, :] - target, dim=-1)\n    dist = dist.min(dim=-1, keepdim=False)[0]\n    min_inst_idxs = torch.argmin(dist, dim=-1).tolist()\n    batch_idxs = [[i] for i in range(dist.shape[0])]\n    ts_idxs = [[i for i in range(dist.shape[1])] for j in range(dist.shape[0])]\n    target_map_inst = target[batch_idxs, ts_idxs, min_inst_idxs]  # [B, fut_ts, num_pts, 2]\n\n    # calculate distance\n    dist = torch.linalg.norm(pred[:, :, None, :] - target_map_inst, dim=-1)\n    min_pts_idxs = torch.argmin(dist, dim=-1)\n    min_pts_next_idxs = min_pts_idxs.clone()\n    is_end_point = (min_pts_next_idxs == num_map_pts-1)\n    not_end_point = (min_pts_next_idxs != num_map_pts-1)\n    min_pts_next_idxs[is_end_point] = num_map_pts - 2\n    min_pts_next_idxs[not_end_point] = min_pts_next_idxs[not_end_point] + 1\n    min_pts_idxs = min_pts_idxs.tolist()\n    min_pts_next_idxs = min_pts_next_idxs.tolist()\n    traj_yaw = torch.atan2(torch.diff(pred[..., 1]), torch.diff(pred[..., 0]))  # [B, fut_ts-1]\n    # last ts yaw assume same as previous\n    traj_yaw = torch.cat([traj_yaw, traj_yaw[:, [-1]]], dim=-1)  # [B, fut_ts]\n    min_pts = target_map_inst[batch_idxs, ts_idxs, min_pts_idxs]\n    dist = torch.linalg.norm(min_pts - pred, dim=-1)\n    dist_mask = dist > dis_thresh\n    min_pts = min_pts.unsqueeze(2)\n    min_pts_next = target_map_inst[batch_idxs, ts_idxs, min_pts_next_idxs].unsqueeze(2)\n    map_pts = torch.cat([min_pts, min_pts_next], dim=2)\n    lane_yaw = torch.atan2(torch.diff(map_pts[..., 1]).squeeze(-1), torch.diff(map_pts[..., 0]).squeeze(-1))  # [B, fut_ts]\n    yaw_diff = traj_yaw - lane_yaw\n    yaw_diff[yaw_diff > math.pi] =  yaw_diff[yaw_diff > math.pi] - math.pi\n    yaw_diff[yaw_diff > math.pi/2] = yaw_diff[yaw_diff > math.pi/2] - math.pi\n    yaw_diff[yaw_diff < -math.pi] = yaw_diff[yaw_diff < -math.pi] + math.pi\n    yaw_diff[yaw_diff < -math.pi/2] = yaw_diff[yaw_diff < -math.pi/2] + math.pi\n    yaw_diff[dist_mask] = 0  # loss = 0 if no lane around ego\n    yaw_diff[static_mask] = 0  # loss = 0 if ego is static\n\n    loss = torch.abs(yaw_diff)\n\n    return loss  # [B, fut_ts]\n\n\n"
  },
  {
    "path": "mmdet3d/models/fbbev/streammapnet/CustomMSDeformableAttention.py",
    "content": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ---------------------------------------------\n#  Modified by Zhiqi Li\n# ---------------------------------------------\n\nfrom mmcv.ops.multi_scale_deform_attn import multi_scale_deformable_attn_pytorch\nimport mmcv\nimport cv2 as cv\nimport copy\nimport warnings\nfrom matplotlib import pyplot as plt\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import xavier_init, constant_init\nfrom mmcv.cnn.bricks.registry import (ATTENTION,\n                                      TRANSFORMER_LAYER_SEQUENCE)\nfrom mmcv.cnn.bricks.transformer import TransformerLayerSequence\nimport math\nfrom mmcv.runner.base_module import BaseModule, ModuleList, Sequential\nfrom mmcv.utils import (ConfigDict, build_from_cfg, deprecated_api_warning,\n                        to_2tuple)\n\nfrom mmcv.utils import ext_loader\nfrom mmcv.ops.multi_scale_deform_attn import (MultiScaleDeformableAttnFunction,\n                                              multi_scale_deformable_attn_pytorch)\nfrom .fp16_dattn import MultiScaleDeformableAttnFunctionFp32\n\n@ATTENTION.register_module()\nclass CustomMSDeformableAttention(BaseModule):\n    \"\"\"An attention module used in Deformable-Detr.\n\n    `Deformable DETR: Deformable Transformers for End-to-End Object Detection.\n    <https://arxiv.org/pdf/2010.04159.pdf>`_.\n\n    Args:\n        embed_dims (int): The embedding dimension of Attention.\n            Default: 256.\n        num_heads (int): Parallel attention heads. Default: 64.\n        num_levels (int): The number of feature map used in\n            Attention. Default: 4.\n        num_points (int): The number of sampling points for\n            each query in each head. Default: 4.\n        im2col_step (int): The step used in image_to_column.\n            Default: 64.\n        dropout (float): A Dropout layer on `inp_identity`.\n            Default: 0.1.\n        batch_first (bool): Key, Query and Value are shape of\n            (batch, n, embed_dim)\n            or (n, batch, embed_dim). Default to False.\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: None.\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 embed_dims=256,\n                 num_heads=8,\n                 num_levels=4,\n                 num_points=4,\n                 im2col_step=64,\n                 dropout=0.1,\n                 use_sampling_offsets=True,\n                 batch_first=False,\n                 norm_cfg=None,\n                 init_cfg=None):\n        super().__init__(init_cfg)\n        if embed_dims % num_heads != 0:\n            raise ValueError(f'embed_dims must be divisible by num_heads, '\n                             f'but got {embed_dims} and {num_heads}')\n        dim_per_head = embed_dims // num_heads\n        self.norm_cfg = norm_cfg\n        self.dropout = nn.Dropout(dropout)\n        self.batch_first = batch_first\n        self.fp16_enabled = False\n\n        # you'd better set dim_per_head to a power of 2\n        # which is more efficient in the CUDA implementation\n        def _is_power_of_2(n):\n            if (not isinstance(n, int)) or (n < 0):\n                raise ValueError(\n                    'invalid input for _is_power_of_2: {} (type: {})'.format(\n                        n, type(n)))\n            return (n & (n - 1) == 0) and n != 0\n\n        if not _is_power_of_2(dim_per_head):\n            warnings.warn(\n                \"You'd better set embed_dims in \"\n                'MultiScaleDeformAttention to make '\n                'the dimension of each attention head a power of 2 '\n                'which is more efficient in our CUDA implementation.')\n\n        self.im2col_step = im2col_step\n        self.embed_dims = embed_dims\n        self.num_levels = num_levels\n        self.num_heads = num_heads\n        self.num_points = num_points\n        self.use_sampling_offsets = use_sampling_offsets\n        if use_sampling_offsets:\n            self.sampling_offsets = nn.Linear(\n                embed_dims, num_heads * num_levels * num_points * 2)\n        self.attention_weights = nn.Linear(embed_dims,\n                                           num_heads * num_levels * num_points)\n        self.value_proj = nn.Linear(embed_dims, embed_dims)\n        self.output_proj = nn.Linear(embed_dims, embed_dims)\n        self.init_weights()\n\n    def init_weights(self):\n        \"\"\"Default initialization for Parameters of Module.\"\"\"\n        if self.use_sampling_offsets:\n            constant_init(self.sampling_offsets, 0.)\n            thetas = torch.arange(\n                self.num_heads,\n                dtype=torch.float32) * (2.0 * math.pi / self.num_heads)\n            grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)\n            grid_init = (grid_init /\n                        grid_init.abs().max(-1, keepdim=True)[0]).view(\n                self.num_heads, 1, 1,\n                2).repeat(1, self.num_levels, self.num_points, 1)\n            for i in range(self.num_points):\n                grid_init[:, :, i, :] *= i + 1\n\n            self.sampling_offsets.bias.data = grid_init.view(-1)\n        constant_init(self.attention_weights, val=0., bias=0.)\n        xavier_init(self.value_proj, distribution='uniform', bias=0.)\n        xavier_init(self.output_proj, distribution='uniform', bias=0.)\n        self._is_init = True\n\n    @deprecated_api_warning({'residual': 'identity'},\n                            cls_name='MultiScaleDeformableAttention')\n    def forward(self,\n                query,\n                key=None,\n                value=None,\n                identity=None,\n                query_pos=None,\n                key_padding_mask=None,\n                reference_points=None,\n                spatial_shapes=None,\n                level_start_index=None,\n                flag='decoder',\n                **kwargs):\n        \"\"\"Forward Function of MultiScaleDeformAttention.\n\n        Args:\n            query (Tensor): Query of Transformer with shape\n                (num_query, bs, embed_dims).\n            key (Tensor): The key tensor with shape\n                `(num_key, bs, embed_dims)`.\n            value (Tensor): The value tensor with shape\n                `(num_key, bs, embed_dims)`.\n            identity (Tensor): The tensor used for addition, with the\n                same shape as `query`. Default None. If None,\n                `query` will be used.\n            query_pos (Tensor): The positional encoding for `query`.\n                Default: None.\n            key_pos (Tensor): The positional encoding for `key`. Default\n                None.\n            reference_points (Tensor):  The normalized reference\n                points with shape (bs, num_query, num_levels, num_points, 2),\n                all elements is range in [0, 1], top-left (0,0),\n                bottom-right (1, 1), including padding area.\n            key_padding_mask (Tensor): ByteTensor for `query`, with\n                shape [bs, num_key].\n            spatial_shapes (Tensor): Spatial shape of features in\n                different levels. With shape (num_levels, 2),\n                last dimension represents (h, w).\n            level_start_index (Tensor): The start index of each level.\n                A tensor has shape ``(num_levels, )`` and can be represented\n                as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].\n\n        Returns:\n             Tensor: forwarded results with shape [num_query, bs, embed_dims].\n        \"\"\"\n\n        if value is None:\n            value = query\n\n        if identity is None:\n            identity = query\n        if query_pos is not None:\n            query = query + query_pos\n        if not self.batch_first:\n            # change to (bs, num_query ,embed_dims)\n            query = query.permute(1, 0, 2)\n            value = value.permute(1, 0, 2)\n\n        bs, num_query, _ = query.shape\n        bs, num_value, _ = value.shape\n        assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value\n\n        value = self.value_proj(value)\n        if key_padding_mask is not None:\n            value = value.masked_fill(key_padding_mask[..., None], 0.0)\n        value = value.view(bs, num_value, self.num_heads, -1)\n\n        if self.use_sampling_offsets:\n            sampling_offsets = self.sampling_offsets(query).view(\n                bs, num_query, self.num_heads, self.num_levels, self.num_points, 2)\n        else:\n            sampling_offsets = query.new_zeros((bs, num_query, self.num_heads, self.num_levels, self.num_points, 2))\n        \n        attention_weights = self.attention_weights(query).view(\n            bs, num_query, self.num_heads, self.num_levels * self.num_points)\n        attention_weights = attention_weights.softmax(-1)\n\n        attention_weights = attention_weights.view(bs, num_query,\n                                                   self.num_heads,\n                                                   self.num_levels,\n                                                   self.num_points)\n        \n        # TODO: try remove sampling offsets\n        offset_normalizer = torch.stack(\n            [spatial_shapes[..., 1], spatial_shapes[..., 0]], -1) # changed to (h, w)\n        _, _, num_points, _ = reference_points.shape\n        # (bs, num_queries, num_pts, 2) ->\n        # (bs, num_queries, num_heads, num_lvls, num_pts, 2) \n        reference_points = reference_points[:, :, None, None, :, :]\n        # reference_points[..., 1:2] = -reference_points[..., 1:2]\n        sampling_locations = reference_points + \\\n            (sampling_offsets # (bs, num_queries, num_heads, num_lvls, num_pts, 2) \n            / offset_normalizer[None, None, None, :, None, :])\n        assert list(sampling_locations.shape) == [bs, num_query, self.num_heads, self.num_levels, num_points, 2]\n        \n        if torch.cuda.is_available() and value.is_cuda:\n            # using fp16 deformable attention is unstable because it performs many sum operations\n            output = MultiScaleDeformableAttnFunctionFp32.apply(\n                value, spatial_shapes, level_start_index, sampling_locations,\n                attention_weights, self.im2col_step)\n        else:\n            output = multi_scale_deformable_attn_pytorch(\n                value, spatial_shapes, sampling_locations, attention_weights)\n\n        output = self.output_proj(output)\n\n        if not self.batch_first:\n            # (num_query, bs ,embed_dims)\n            output = output.permute(1, 0, 2)\n\n        return self.dropout(output) + identity"
  },
  {
    "path": "mmdet3d/models/fbbev/streammapnet/__init__.py",
    "content": "from .cost import *\nfrom .hungarian_lines_assigner import *\nfrom .loss import *\nfrom .streammapnet_head import MapDetectorHead\nfrom .transformer import MapTransformerDecoder_new, MapTransformerLayer, MapTransformer\n"
  },
  {
    "path": "mmdet3d/models/fbbev/streammapnet/cost.py",
    "content": "import torch\nfrom mmdet.core.bbox.match_costs.builder import MATCH_COST\nfrom mmdet.core.bbox.match_costs import build_match_cost\nfrom torch.nn.functional import smooth_l1_loss\n\nfrom mmdet.core.bbox.iou_calculators import bbox_overlaps\nfrom mmdet.core.bbox.transforms import bbox_cxcywh_to_xyxy\ndef chamfer_distance(line1, line2) -> float:\n    ''' Calculate chamfer distance between two lines. Make sure the \n    lines are interpolated.\n\n    Args:\n        line1 (tensor): shape (num_pts, 2)\n        line2 (tensor): shape (num_pts, 2)\n    \n    Returns:\n        distance (float): chamfer distance\n    '''\n    \n    dist_matrix = torch.cdist(line1, line2, p=2)\n    dist12 = dist_matrix.min(-1)[0].sum() / len(line1)\n    dist21 = dist_matrix.min(-2)[0].sum() / len(line2)\n\n    return (dist12 + dist21) / 2\n\n\n@MATCH_COST.register_module()\nclass ClsSigmoidCost:\n    \"\"\"ClsSoftmaxCost.\n     Args:\n         weight (int | float, optional): loss_weight\n    \"\"\"\n\n    def __init__(self, weight=1.):\n        self.weight = weight\n\n    def __call__(self, cls_pred, gt_labels):\n        \"\"\"\n        Args:\n            cls_pred (Tensor): Predicted classification logits, shape\n                [num_query, num_class].\n            gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).\n        Returns:\n            torch.Tensor: cls_cost value with weight\n        \"\"\"\n        # Following the official DETR repo, contrary to the loss that\n        # NLL is used, we approximate it in 1 - cls_score[gt_label].\n        # The 1 is a constant that doesn't change the matching,\n        # so it can be omitted.\n        cls_score = cls_pred.sigmoid()\n        cls_cost = -cls_score[:, gt_labels]\n        return cls_cost * self.weight\n\n\n@MATCH_COST.register_module()\nclass LinesFixNumChamferCost(object):\n    \"\"\"BBox3DL1Cost.\n     Args:\n         weight (int | float, optional): loss_weight\n    \"\"\"\n\n    def __init__(self, weight=1.0, permute=False):\n        self.weight = weight\n        self.permute = permute\n\n    def __call__(self, lines_pred, gt_lines):\n        \"\"\"\n        Args:\n            lines_pred (Tensor): predicted normalized lines:\n                [num_query, 2*num_points]\n            gt_lines (Tensor): Ground truth lines\n                [num_gt, 2*num_points] or [num_gt, num_permute, 2*num_points]\n        Returns:\n            torch.Tensor: reg_cost value with weight\n                shape [num_pred, num_gt]\n        \"\"\"\n\n        if self.permute:\n            assert len(gt_lines.shape) == 3\n        else:\n            assert len(gt_lines.shape) == 2\n        \n        num_gt, num_pred = len(gt_lines), len(lines_pred)\n        if self.permute:\n            gt_lines = gt_lines.flatten(0, 1) # (num_gt*num_permute, 2*num_pts)\n\n        num_pts = lines_pred.shape[-1] // 2\n        lines_pred = lines_pred.view(-1, 2) # [num_query*num_points, 2]\n        gt_lines = gt_lines.view(-1, 2) # [num_gt*num_points, 2]\n        \n        dist_mat = torch.cdist(lines_pred, gt_lines, p=2) # (num_query*num_points, num_gt*num_points)\n        dist_mat = torch.stack(torch.split(dist_mat, num_pts, dim=-1)) # (num_gt, num_query*num_points, num_pts)\n        dist_mat = torch.stack(torch.split(dist_mat, num_pts, dim=1)) # (num_q, num_gt, num_pts, num_pts)\n\n        dist1 = dist_mat.min(-1)[0].sum(-1)\n        dist2 = dist_mat.min(-2)[0].sum(-1)\n\n        dist_mat = (dist1 + dist2) / (2 * num_pts) # (num_pred, num_gt)\n\n        if self.permute:\n            # dist_mat: (num_pred, num_gt*num_permute)\n            dist_mat = dist_mat.view(num_pred, num_gt, -1) # (num_pred, num_gt, num_permute)\n            dist_mat, gt_permute_index = dist_mat.min(-1)\n            return dist_mat * self.weight, gt_permute_index\n\n        return dist_mat * self.weight\n\n\n@MATCH_COST.register_module()\nclass LinesL1Cost(object):\n    \"\"\"LinesL1Cost.\n     Args:\n         weight (int | float, optional): loss_weight\n    \"\"\"\n\n    def __init__(self, weight=1.0, beta=0.0, permute=False):\n        self.weight = weight\n        self.permute = permute\n        self.beta = beta\n\n    def __call__(self, lines_pred, gt_lines, **kwargs):\n        \"\"\"\n        Args:\n            lines_pred (Tensor): predicted normalized lines:\n                [num_query, num_points, 2] or [num_query, num_points * 2]\n            gt_lines (Tensor): Ground truth lines\n                [num_gt, num_points, 2] or [num_gt, num_permute, num_points, 2]\n        Returns:\n            torch.Tensor: reg_cost value with weight\n                shape [num_pred, num_gt]\n        \"\"\"\n\n        if self.permute:\n            assert len(gt_lines.shape) == 4\n        else:\n            assert len(gt_lines.shape) == 3\n        if lines_pred.dim() == 3:\n            lines_pred = lines_pred.flatten(-2, -1)\n        gt_lines = gt_lines.flatten(-2, -1)\n\n        num_pred, num_gt = len(lines_pred), len(gt_lines)\n        if self.permute:\n            # permute-invarint labels\n            gt_lines = gt_lines.flatten(0, 1) # (num_gt*num_permute, 2*num_pts)\n\n        num_pts = lines_pred.shape[-1]//2\n\n        if self.beta > 0:\n            lines_pred = lines_pred.unsqueeze(1).repeat(1, len(gt_lines), 1)\n            gt_lines = gt_lines.unsqueeze(0).repeat(num_pred, 1, 1)\n            dist_mat = smooth_l1_loss(lines_pred, gt_lines, reduction='none', beta=self.beta).sum(-1)\n        \n        else:\n            dist_mat = torch.cdist(lines_pred, gt_lines, p=1)\n\n        dist_mat = dist_mat / num_pts\n\n        if self.permute:\n            # dist_mat: (num_pred, num_gt*num_permute)\n            dist_mat = dist_mat.view(num_pred, num_gt, -1) # (num_pred, num_gt, num_permute)\n            dist_mat, gt_permute_index = torch.min(dist_mat, 2)\n            return dist_mat * self.weight, gt_permute_index\n        \n        return dist_mat * self.weight\n\n\n@MATCH_COST.register_module()\nclass BBoxCostC:\n    \"\"\"BBoxL1Cost.\n     Args:\n         weight (int | float, optional): loss_weight\n         box_format (str, optional): 'xyxy' for DETR, 'xywh' for Sparse_RCNN\n     Examples:\n         >>> from mmdet.core.bbox.match_costs.match_cost import BBoxL1Cost\n         >>> import torch\n         >>> self = BBoxL1Cost()\n         >>> bbox_pred = torch.rand(1, 4)\n         >>> gt_bboxes= torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]])\n         >>> factor = torch.tensor([10, 8, 10, 8])\n         >>> self(bbox_pred, gt_bboxes, factor)\n         tensor([[1.6172, 1.6422]])\n    \"\"\"\n\n    def __init__(self, weight=1., box_format='xyxy'):\n        self.weight = weight\n        assert box_format in ['xyxy', 'xywh']\n        self.box_format = box_format\n\n    def __call__(self, bbox_pred, gt_bboxes):\n        \"\"\"\n        Args:\n            bbox_pred (Tensor): Predicted boxes with normalized coordinates\n                (cx, cy, w, h), which are all in range [0, 1]. Shape\n                [num_query, 4].\n            gt_bboxes (Tensor): Ground truth boxes with normalized\n                coordinates (x1, y1, x2, y2). Shape [num_gt, 4].\n        Returns:\n            torch.Tensor: bbox_cost value with weight\n        \"\"\"\n        # if self.box_format == 'xywh':\n        #     gt_bboxes = bbox_xyxy_to_cxcywh(gt_bboxes)\n        # elif self.box_format == 'xyxy':\n        #     bbox_pred = bbox_cxcywh_to_xyxy(bbox_pred)\n        bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1)\n        return bbox_cost * self.weight\n\n\n@MATCH_COST.register_module()\nclass IoUCostC:\n    \"\"\"IoUCost.\n     Args:\n         iou_mode (str, optional): iou mode such as 'iou' | 'giou'\n         weight (int | float, optional): loss weight\n     Examples:\n         >>> from mmdet.core.bbox.match_costs.match_cost import IoUCost\n         >>> import torch\n         >>> self = IoUCost()\n         >>> bboxes = torch.FloatTensor([[1,1, 2, 2], [2, 2, 3, 4]])\n         >>> gt_bboxes = torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]])\n         >>> self(bboxes, gt_bboxes)\n         tensor([[-0.1250,  0.1667],\n                [ 0.1667, -0.5000]])\n    \"\"\"\n\n    def __init__(self, iou_mode='giou', weight=1., box_format='xywh'):\n        self.weight = weight\n        self.iou_mode = iou_mode\n        assert box_format in ['xyxy', 'xywh']\n        self.box_format = box_format\n\n    def __call__(self, bboxes, gt_bboxes):\n        \"\"\"\n        Args:\n            bboxes (Tensor): Predicted boxes with unnormalized coordinates\n                (x1, y1, x2, y2). Shape [num_query, 4].\n            gt_bboxes (Tensor): Ground truth boxes with unnormalized\n                coordinates (x1, y1, x2, y2). Shape [num_gt, 4].\n        Returns:\n            torch.Tensor: iou_cost value with weight\n        \"\"\"\n        if self.box_format == 'xywh':\n            bboxes = bbox_cxcywh_to_xyxy(bboxes)\n            gt_bboxes = bbox_cxcywh_to_xyxy(gt_bboxes)\n\n        # overlaps: [num_bboxes, num_gt]\n        overlaps = bbox_overlaps(\n            bboxes, gt_bboxes, mode=self.iou_mode, is_aligned=False)\n        # The 1 is a constant that doesn't change the matching, so omitted.\n        iou_cost = -overlaps\n        return iou_cost * self.weight\n\n@MATCH_COST.register_module()\nclass DynamicLinesCost(object):\n    \"\"\"LinesL1Cost.\n     Args:\n         weight (int | float, optional): loss_weight\n    \"\"\"\n\n    def __init__(self, weight=1.):\n        self.weight = weight\n\n    def __call__(self, lines_pred, lines_gt, masks_pred, masks_gt):\n        \"\"\"\n        Args:\n            lines_pred (Tensor): predicted normalized lines:\n                [nP, num_points, 2]\n            lines_gt (Tensor): Ground truth lines\n                [nG, num_points, 2]\n            masks_pred: [nP, num_points]\n            masks_gt: [nG, num_points]\n        Returns:\n            dist_mat: reg_cost value with weight\n                shape [nP, nG]\n        \"\"\"\n\n        dist_mat = self.cal_dist(lines_pred, lines_gt)\n\n        dist_mat = self.get_dynamic_line(dist_mat, masks_pred, masks_gt)\n\n        dist_mat = dist_mat * self.weight\n\n        return dist_mat\n\n    def cal_dist(self, x1, x2):\n        '''\n            Args:\n                x1: B1,N,2\n                x2: B2,N,2\n            Return:\n                dist_mat: B1,B2,N\n        '''\n        x1 = x1.permute(1, 0, 2)\n        x2 = x2.permute(1, 0, 2)\n\n        dist_mat = torch.cdist(x1, x2, p=2)\n\n        dist_mat = dist_mat.permute(1, 2, 0)\n\n        return dist_mat\n\n    def get_dynamic_line(self, mat, m1, m2):\n        '''\n            get dynamic line with difference approach\n            mat: N1xN2xnpts\n            m1: N1xnpts\n            m2: N2xnpts\n        '''\n\n        # nPxnGxnum_points\n        m1 = m1.unsqueeze(1).sigmoid() > 0.5\n        m2 = m2.unsqueeze(0)\n\n        valid_points_mask = (m1 + m2)/2.\n\n        average_factor_mask = valid_points_mask.sum(-1) > 0\n        average_factor = average_factor_mask.masked_fill(\n            ~average_factor_mask, 1)\n\n        # takes the average\n        mat = mat * valid_points_mask\n        mat = mat.sum(-1) / average_factor\n\n        return mat\n\n\n@MATCH_COST.register_module()\nclass BBoxLogitsCost(object):\n    \"\"\"BBoxLogits.\n     Args:\n         weight (int | float, optional): loss_weight\n    \"\"\"\n\n    def __init__(self, weight=1.):\n        self.weight = weight\n\n    def calNLL(self, logits, value):\n        '''\n            Args:\n                logits: B1, 8, cls_dim\n                value: B2, 8,\n            Return:\n                log_likelihood: B1,B2,8\n        '''\n\n        logits = logits[:, None]\n        value = value[None]\n\n        value = value.long().unsqueeze(-1)\n        value, log_pmf = torch.broadcast_tensors(value, logits)\n        value = value[..., :1]\n        return log_pmf.gather(-1, value).squeeze(-1)\n\n    def __call__(self, bbox_pred, bbox_gt, **kwargs):\n        \"\"\"\n        Args:\n            bbox_pred: nproposal, 4*2, pos_dim\n            bbox_gt: ngt, 4*2\n        Returns:\n            cost: nproposal, ngt\n        \"\"\"\n\n        cost = self.calNLL(bbox_pred, bbox_gt).mean(-1)\n\n        return cost * self.weight\n\n\n@MATCH_COST.register_module()\nclass MapQueriesCost(object):\n\n    def __init__(self, cls_cost, reg_cost, iou_cost=None):\n\n        self.cls_cost = build_match_cost(cls_cost)\n        self.reg_cost = build_match_cost(reg_cost)\n\n        self.iou_cost = None\n        if iou_cost is not None:\n            self.iou_cost = build_match_cost(iou_cost)\n\n    def __call__(self, preds: dict, gts: dict):\n\n        # classification and bboxcost.\n        cls_cost = self.cls_cost(preds['scores'], gts['labels'])\n\n        # regression cost\n        regkwargs = {}\n        if 'masks' in preds and 'masks' in gts:\n            assert isinstance(self.reg_cost, DynamicLinesCost), ' Issues!!'\n            regkwargs = {\n                'masks_pred': preds['masks'],\n                'masks_gt': gts['masks'],\n            }\n\n        reg_cost = self.reg_cost(preds['lines'], gts['lines'], **regkwargs)\n        if self.reg_cost.permute:\n            reg_cost, gt_permute_idx = reg_cost\n\n        # weighted sum of above three costs\n        cost = cls_cost + reg_cost\n\n        # Iou\n        if self.iou_cost is not None:\n            iou_cost = self.iou_cost(preds['lines'],gts['lines'])\n            cost += iou_cost\n        \n        if self.reg_cost.permute:\n            return cost, gt_permute_idx\n        return cost"
  },
  {
    "path": "mmdet3d/models/fbbev/streammapnet/fp16_dattn.py",
    "content": "\nimport warnings\ntry:\n    from mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention\nexcept ImportError:\n    warnings.warn(\n        '`MultiScaleDeformableAttention` in MMCV has been moved to '\n        '`mmcv.ops.multi_scale_deform_attn`, please update your MMCV')\n    from mmcv.cnn.bricks.transformer import MultiScaleDeformableAttention\nfrom mmcv.runner import force_fp32, auto_fp16\nfrom mmcv.cnn.bricks.registry import ATTENTION\n\n\nfrom mmcv.runner.base_module import BaseModule, ModuleList, Sequential\nfrom mmcv.cnn.bricks.transformer import build_attention\n\nimport math\nimport warnings\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torch.autograd.function import Function, once_differentiable\n\nfrom mmcv import deprecated_api_warning\nfrom mmcv.cnn import constant_init, xavier_init\nfrom mmcv.cnn.bricks.registry import ATTENTION\nfrom mmcv.runner import BaseModule\nfrom mmcv.utils import ext_loader\next_module = ext_loader.load_ext(\n    '_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward'])\nfrom torch.cuda.amp import custom_bwd, custom_fwd\n\n\n@ATTENTION.register_module()\nclass MultiScaleDeformableAttentionFp16(BaseModule):\n\n    def __init__(self, attn_cfg=None,init_cfg=None,**kwarg):\n        super(MultiScaleDeformableAttentionFp16,self).__init__(init_cfg)\n\n        # import ipdb; ipdb.set_trace()\n        self.deformable_attention = build_attention(attn_cfg)\n        self.deformable_attention.init_weights()\n        self.fp16_enabled = False\n\n\n    @force_fp32(apply_to=('query', 'key', 'value', 'query_pos', 'reference_points','identity'))\n    def forward(self, query,\n                key=None,\n                value=None,\n                identity=None,\n                query_pos=None,\n                key_padding_mask=None,\n                reference_points=None,\n                spatial_shapes=None,\n                level_start_index=None,\n                **kwargs):\n        # import ipdb; ipdb.set_trace()\n        return self.deformable_attention(query,\n                key=key,\n                value=value,\n                identity=identity,\n                query_pos=query_pos,\n                key_padding_mask=key_padding_mask,\n                reference_points=reference_points,\n                spatial_shapes=spatial_shapes,\n                level_start_index=level_start_index,**kwargs)\n\n\n\nclass MultiScaleDeformableAttnFunctionFp32(Function):\n\n    @staticmethod\n    @custom_fwd(cast_inputs=torch.float32)\n    def forward(ctx, value, value_spatial_shapes, value_level_start_index,\n                sampling_locations, attention_weights, im2col_step):\n        \"\"\"GPU version of multi-scale deformable attention.\n        Args:\n            value (Tensor): The value has shape\n                (bs, num_keys, mum_heads, embed_dims//num_heads)\n            value_spatial_shapes (Tensor): Spatial shape of\n                each feature map, has shape (num_levels, 2),\n                last dimension 2 represent (h, w)\n            sampling_locations (Tensor): The location of sampling points,\n                has shape\n                (bs ,num_queries, num_heads, num_levels, num_points, 2),\n                the last dimension 2 represent (x, y).\n            attention_weights (Tensor): The weight of sampling points used\n                when calculate the attention, has shape\n                (bs ,num_queries, num_heads, num_levels, num_points),\n            im2col_step (Tensor): The step used in image to column.\n        Returns:\n            Tensor: has shape (bs, num_queries, embed_dims)\n        \"\"\"\n\n        ctx.im2col_step = im2col_step\n        output = ext_module.ms_deform_attn_forward(\n            value,\n            value_spatial_shapes,\n            value_level_start_index,\n            sampling_locations,\n            attention_weights,\n            im2col_step=ctx.im2col_step)\n        ctx.save_for_backward(value, value_spatial_shapes,\n                              value_level_start_index, sampling_locations,\n                              attention_weights)\n        return output\n\n    @staticmethod\n    @once_differentiable\n    @custom_bwd\n    def backward(ctx, grad_output):\n        \"\"\"GPU version of backward function.\n        Args:\n            grad_output (Tensor): Gradient\n                of output tensor of forward.\n        Returns:\n             Tuple[Tensor]: Gradient\n                of input tensors in forward.\n        \"\"\"\n        value, value_spatial_shapes, value_level_start_index,\\\n            sampling_locations, attention_weights = ctx.saved_tensors\n        grad_value = torch.zeros_like(value)\n        grad_sampling_loc = torch.zeros_like(sampling_locations)\n        grad_attn_weight = torch.zeros_like(attention_weights)\n\n        ext_module.ms_deform_attn_backward(\n            value,\n            value_spatial_shapes,\n            value_level_start_index,\n            sampling_locations,\n            attention_weights,\n            grad_output.contiguous(),\n            grad_value,\n            grad_sampling_loc,\n            grad_attn_weight,\n            im2col_step=ctx.im2col_step)\n\n        return grad_value, None, None, \\\n            grad_sampling_loc, grad_attn_weight, None\n\n\ndef multi_scale_deformable_attn_pytorch(value, value_spatial_shapes,\n                                        sampling_locations, attention_weights):\n    \"\"\"CPU version of multi-scale deformable attention.\n    Args:\n        value (Tensor): The value has shape\n            (bs, num_keys, mum_heads, embed_dims//num_heads)\n        value_spatial_shapes (Tensor): Spatial shape of\n            each feature map, has shape (num_levels, 2),\n            last dimension 2 represent (h, w)\n        sampling_locations (Tensor): The location of sampling points,\n            has shape\n            (bs ,num_queries, num_heads, num_levels, num_points, 2),\n            the last dimension 2 represent (x, y).\n        attention_weights (Tensor): The weight of sampling points used\n            when calculate the attention, has shape\n            (bs ,num_queries, num_heads, num_levels, num_points),\n    Returns:\n        Tensor: has shape (bs, num_queries, embed_dims)\n    \"\"\"\n\n    bs, _, num_heads, embed_dims = value.shape\n    _, num_queries, num_heads, num_levels, num_points, _ =\\\n        sampling_locations.shape\n    value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes],\n                             dim=1)\n    sampling_grids = 2 * sampling_locations - 1\n    sampling_value_list = []\n    for level, (H_, W_) in enumerate(value_spatial_shapes):\n        # bs, H_*W_, num_heads, embed_dims ->\n        # bs, H_*W_, num_heads*embed_dims ->\n        # bs, num_heads*embed_dims, H_*W_ ->\n        # bs*num_heads, embed_dims, H_, W_\n        value_l_ = value_list[level].flatten(2).transpose(1, 2).reshape(\n            bs * num_heads, embed_dims, H_, W_)\n        # bs, num_queries, num_heads, num_points, 2 ->\n        # bs, num_heads, num_queries, num_points, 2 ->\n        # bs*num_heads, num_queries, num_points, 2\n        sampling_grid_l_ = sampling_grids[:, :, :,\n                                          level].transpose(1, 2).flatten(0, 1)\n        # bs*num_heads, embed_dims, num_queries, num_points\n        sampling_value_l_ = F.grid_sample(\n            value_l_,\n            sampling_grid_l_,\n            mode='bilinear',\n            padding_mode='zeros',\n            align_corners=False)\n        sampling_value_list.append(sampling_value_l_)\n    # (bs, num_queries, num_heads, num_levels, num_points) ->\n    # (bs, num_heads, num_queries, num_levels, num_points) ->\n    # (bs, num_heads, 1, num_queries, num_levels*num_points)\n    attention_weights = attention_weights.transpose(1, 2).reshape(\n        bs * num_heads, 1, num_queries, num_levels * num_points)\n    output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) *\n              attention_weights).sum(-1).view(bs, num_heads * embed_dims,\n                                              num_queries)\n    return output.transpose(1, 2).contiguous()\n\n\n@ATTENTION.register_module()\nclass MultiScaleDeformableAttentionFP32(BaseModule):\n    \"\"\"An attention module used in Deformable-Detr. `Deformable DETR:\n    Deformable Transformers for End-to-End Object Detection.\n      <https://arxiv.org/pdf/2010.04159.pdf>`_.\n    Args:\n        embed_dims (int): The embedding dimension of Attention.\n            Default: 256.\n        num_heads (int): Parallel attention heads. Default: 64.\n        num_levels (int): The number of feature map used in\n            Attention. Default: 4.\n        num_points (int): The number of sampling points for\n            each query in each head. Default: 4.\n        im2col_step (int): The step used in image_to_column.\n            Default: 64.\n        dropout (float): A Dropout layer on `inp_identity`.\n            Default: 0.1.\n        batch_first (bool): Key, Query and Value are shape of\n            (batch, n, embed_dim)\n            or (n, batch, embed_dim). Default to False.\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: None.\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 embed_dims=256,\n                 num_heads=8,\n                 num_levels=4,\n                 num_points=4,\n                 im2col_step=64,\n                 dropout=0.1,\n                 batch_first=False,\n                 norm_cfg=None,\n                 init_cfg=None):\n        super().__init__(init_cfg)\n        if embed_dims % num_heads != 0:\n            raise ValueError(f'embed_dims must be divisible by num_heads, '\n                             f'but got {embed_dims} and {num_heads}')\n        dim_per_head = embed_dims // num_heads\n        self.norm_cfg = norm_cfg\n        self.dropout = nn.Dropout(dropout)\n        self.batch_first = batch_first\n\n        # you'd better set dim_per_head to a power of 2\n        # which is more efficient in the CUDA implementation\n        def _is_power_of_2(n):\n            if (not isinstance(n, int)) or (n < 0):\n                raise ValueError(\n                    'invalid input for _is_power_of_2: {} (type: {})'.format(\n                        n, type(n)))\n            return (n & (n - 1) == 0) and n != 0\n\n        if not _is_power_of_2(dim_per_head):\n            warnings.warn(\n                \"You'd better set embed_dims in \"\n                'MultiScaleDeformAttention to make '\n                'the dimension of each attention head a power of 2 '\n                'which is more efficient in our CUDA implementation.')\n\n        self.im2col_step = im2col_step\n        self.embed_dims = embed_dims\n        self.num_levels = num_levels\n        self.num_heads = num_heads\n        self.num_points = num_points\n        self.sampling_offsets = nn.Linear(\n            embed_dims, num_heads * num_levels * num_points * 2)\n        self.attention_weights = nn.Linear(embed_dims,\n                                           num_heads * num_levels * num_points)\n        self.value_proj = nn.Linear(embed_dims, embed_dims)\n        self.output_proj = nn.Linear(embed_dims, embed_dims)\n        self.init_weights()\n\n    def init_weights(self):\n        \"\"\"Default initialization for Parameters of Module.\"\"\"\n        constant_init(self.sampling_offsets, 0.)\n        thetas = torch.arange(\n            self.num_heads,\n            dtype=torch.float32) * (2.0 * math.pi / self.num_heads)\n        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)\n        grid_init = (grid_init /\n                     grid_init.abs().max(-1, keepdim=True)[0]).view(\n                         self.num_heads, 1, 1,\n                         2).repeat(1, self.num_levels, self.num_points, 1)\n        for i in range(self.num_points):\n            grid_init[:, :, i, :] *= i + 1\n\n        self.sampling_offsets.bias.data = grid_init.view(-1)\n        constant_init(self.attention_weights, val=0., bias=0.)\n        xavier_init(self.value_proj, distribution='uniform', bias=0.)\n        xavier_init(self.output_proj, distribution='uniform', bias=0.)\n        self._is_init = True\n\n    @deprecated_api_warning({'residual': 'identity'},\n                            cls_name='MultiScaleDeformableAttention')\n    def forward(self,\n                query,\n                key=None,\n                value=None,\n                identity=None,\n                query_pos=None,\n                key_padding_mask=None,\n                reference_points=None,\n                spatial_shapes=None,\n                level_start_index=None,\n                **kwargs):\n        \"\"\"Forward Function of MultiScaleDeformAttention.\n        Args:\n            query (Tensor): Query of Transformer with shape\n                (num_query, bs, embed_dims).\n            key (Tensor): The key tensor with shape\n                `(num_key, bs, embed_dims)`.\n            value (Tensor): The value tensor with shape\n                `(num_key, bs, embed_dims)`.\n            identity (Tensor): The tensor used for addition, with the\n                same shape as `query`. Default None. If None,\n                `query` will be used.\n            query_pos (Tensor): The positional encoding for `query`.\n                Default: None.\n            key_pos (Tensor): The positional encoding for `key`. Default\n                None.\n            reference_points (Tensor):  The normalized reference\n                points with shape (bs, num_query, num_levels, 2),\n                all elements is range in [0, 1], top-left (0,0),\n                bottom-right (1, 1), including padding area.\n                or (N, Length_{query}, num_levels, 4), add\n                additional two dimensions is (w, h) to\n                form reference boxes.\n            key_padding_mask (Tensor): ByteTensor for `query`, with\n                shape [bs, num_key].\n            spatial_shapes (Tensor): Spatial shape of features in\n                different levels. With shape (num_levels, 2),\n                last dimension represents (h, w).\n            level_start_index (Tensor): The start index of each level.\n                A tensor has shape ``(num_levels, )`` and can be represented\n                as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].\n        Returns:\n             Tensor: forwarded results with shape [num_query, bs, embed_dims].\n        \"\"\"\n\n        if value is None:\n            value = query\n\n        if identity is None:\n            identity = query\n        if query_pos is not None:\n            query = query + query_pos\n        if not self.batch_first:\n            # change to (bs, num_query ,embed_dims)\n            query = query.permute(1, 0, 2)\n            value = value.permute(1, 0, 2)\n\n        bs, num_query, _ = query.shape\n        bs, num_value, _ = value.shape\n        assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value\n\n        value = self.value_proj(value)\n        if key_padding_mask is not None:\n            value = value.masked_fill(key_padding_mask[..., None], 0.0)\n        value = value.view(bs, num_value, self.num_heads, -1)\n        sampling_offsets = self.sampling_offsets(query).view(\n            bs, num_query, self.num_heads, self.num_levels, self.num_points, 2)\n        attention_weights = self.attention_weights(query).view(\n            bs, num_query, self.num_heads, self.num_levels * self.num_points)\n        attention_weights = attention_weights.softmax(-1)\n\n        attention_weights = attention_weights.view(bs, num_query,\n                                                   self.num_heads,\n                                                   self.num_levels,\n                                                   self.num_points)\n        if reference_points.shape[-1] == 2:\n            offset_normalizer = torch.stack(\n                [spatial_shapes[..., 1], spatial_shapes[..., 0]], -1)\n            sampling_locations = reference_points[:, :, None, :, None, :] \\\n                + sampling_offsets \\\n                / offset_normalizer[None, None, None, :, None, :]\n        elif reference_points.shape[-1] == 4:\n            sampling_locations = reference_points[:, :, None, :, None, :2] \\\n                + sampling_offsets / self.num_points \\\n                * reference_points[:, :, None, :, None, 2:] \\\n                * 0.5\n        else:\n            raise ValueError(\n                f'Last dim of reference_points must be'\n                f' 2 or 4, but get {reference_points.shape[-1]} instead.')\n        if torch.cuda.is_available():\n            output = MultiScaleDeformableAttnFunctionFp32.apply(\n                value, spatial_shapes, level_start_index, sampling_locations,\n                attention_weights, self.im2col_step)\n        else:\n            output = multi_scale_deformable_attn_pytorch(\n                value, spatial_shapes, level_start_index, sampling_locations,\n                attention_weights, self.im2col_step)\n\n        output = self.output_proj(output)\n\n        if not self.batch_first:\n            # (num_query, bs ,embed_dims)\n            output = output.permute(1, 0, 2)\n\n        return self.dropout(output) + identity"
  },
  {
    "path": "mmdet3d/models/fbbev/streammapnet/hungarian_lines_assigner.py",
    "content": "import torch\n\nfrom mmdet.core.bbox.builder import BBOX_ASSIGNERS\nfrom mmdet.core.bbox.assigners import AssignResult\nfrom mmdet.core.bbox.assigners import BaseAssigner\nfrom mmdet.core.bbox.match_costs import build_match_cost\nfrom scipy.optimize import linear_sum_assignment\n\n@BBOX_ASSIGNERS.register_module()\nclass HungarianLinesAssigner(BaseAssigner):\n    \"\"\"\n        Computes one-to-one matching between predictions and ground truth.\n        This class computes an assignment between the targets and the predictions\n        based on the costs. The costs are weighted sum of three components:\n        classification cost and regression L1 cost. The\n        targets don't include the no_object, so generally there are more\n        predictions than targets. After the one-to-one matching, the un-matched\n        are treated as backgrounds. Thus each query prediction will be assigned\n        with `0` or a positive integer indicating the ground truth index:\n        - 0: negative sample, no assigned gt\n        - positive integer: positive sample, index (1-based) of assigned gt\n        Args:\n            cls_weight (int | float, optional): The scale factor for classification\n                cost. Default 1.0.\n            bbox_weight (int | float, optional): The scale factor for regression\n                L1 cost. Default 1.0.\n    \"\"\"\n\n    def __init__(self,\n                 cost=dict(\n                     type='MapQueriesCost',\n                     cls_cost=dict(type='ClassificationCost', weight=1.),\n                     reg_cost=dict(type='LinesCost', weight=1.0),\n                    ),\n                 **kwargs):\n\n        self.cost = build_match_cost(cost)\n\n    def assign(self,\n               preds: dict,\n               gts: dict,\n               gt_bboxes_ignore=None,\n               eps=1e-7):\n        \"\"\"\n            Computes one-to-one matching based on the weighted costs.\n            This method assign each query prediction to a ground truth or\n            background. The `assigned_gt_inds` with -1 means don't care,\n            0 means negative sample, and positive number is the index (1-based)\n            of assigned gt.\n            The assignment is done in the following steps, the order matters.\n            1. assign every prediction to -1\n            2. compute the weighted costs\n            3. do Hungarian matching on CPU based on the costs\n            4. assign all to 0 (background) first, then for each matched pair\n            between predictions and gts, treat this prediction as foreground\n            and assign the corresponding gt index (plus 1) to it.\n            Args:\n                lines_pred (Tensor): predicted normalized lines:\n                    [num_query, num_points, 2]\n                cls_pred (Tensor): Predicted classification logits, shape\n                    [num_query, num_class].\n\n                lines_gt (Tensor): Ground truth lines\n                    [num_gt, num_points, 2].\n                labels_gt (Tensor): Label of `gt_bboxes`, shape (num_gt,).\n                gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are\n                    labelled as `ignored`. Default None.\n                eps (int | float, optional): A value added to the denominator for\n                    numerical stability. Default 1e-7.\n            Returns:\n                :obj:`AssignResult`: The assigned result.\n        \"\"\"\n        assert gt_bboxes_ignore is None, \\\n            'Only case when gt_bboxes_ignore is None is supported.'\n        \n        num_gts, num_lines = gts['lines'].size(0), preds['lines'].size(0)\n\n        # 1. assign -1 by default\n        assigned_gt_inds = \\\n            preds['lines'].new_full((num_lines,), -1, dtype=torch.long)\n        assigned_labels = \\\n            preds['lines'].new_full((num_lines,), -1, dtype=torch.long)\n\n        if num_gts == 0 or num_lines == 0:\n            # No ground truth or boxes, return empty assignment\n            if num_gts == 0:\n                # No ground truth, assign all to background\n                assigned_gt_inds[:] = 0\n            return AssignResult(\n                num_gts, assigned_gt_inds, None, labels=assigned_labels), None\n\n        # 2. compute the weighted costs\n        gt_permute_idx = None # (num_preds, num_gts)\n        if self.cost.reg_cost.permute:\n            cost, gt_permute_idx = self.cost(preds, gts)\n        else:\n            cost = self.cost(preds, gts)\n\n        # 3. do Hungarian matching on CPU using linear_sum_assignment\n        cost = cost.detach().cpu().numpy()\n        if linear_sum_assignment is None:\n            raise ImportError('Please run \"pip install scipy\" '\n                              'to install scipy first.')\n        try:\n            matched_row_inds, matched_col_inds = linear_sum_assignment(cost)\n        except:\n            print('cost max{}, min{}'.format(cost.max(), cost.min()))\n            from IPython import embed\n            embed()\n        matched_row_inds = torch.from_numpy(matched_row_inds).to(\n            preds['lines'].device)\n        matched_col_inds = torch.from_numpy(matched_col_inds).to(\n            preds['lines'].device)\n\n        # 4. assign backgrounds and foregrounds\n        # assign all indices to backgrounds first\n        assigned_gt_inds[:] = 0\n        # assign foregrounds based on matching results\n        assigned_gt_inds[matched_row_inds] = matched_col_inds + 1\n        assigned_labels[matched_row_inds] = gts['labels'][matched_col_inds]\n        return AssignResult(\n            num_gts, assigned_gt_inds, None, labels=assigned_labels), gt_permute_idx"
  },
  {
    "path": "mmdet3d/models/fbbev/streammapnet/loss.py",
    "content": "import torch\nfrom torch import nn as nn\nfrom torch.nn import functional as F\nfrom mmdet.models.losses import l1_loss, smooth_l1_loss\nfrom mmdet.models.losses.utils import weighted_loss\nimport mmcv\n\nfrom mmdet.models.builder import LOSSES\n\n\n@LOSSES.register_module()\nclass LinesL1Loss(nn.Module):\n\n    def __init__(self, reduction='mean', loss_weight=1.0, beta=0.5):\n        \"\"\"\n            L1 loss. The same as the smooth L1 loss\n            Args:\n                reduction (str, optional): The method to reduce the loss.\n                    Options are \"none\", \"mean\" and \"sum\".\n                loss_weight (float, optional): The weight of loss.\n        \"\"\"\n\n        super().__init__()\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n        self.beta = beta\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function.\n        Args:\n            pred (torch.Tensor): The prediction.\n                shape: [bs, ...]\n            target (torch.Tensor): The learning target of the prediction.\n                shape: [bs, ...]\n            weight (torch.Tensor, optional): The weight of loss for each\n                prediction. Defaults to None. \n                it's useful when the predictions are not all valid.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n\n        if self.beta > 0:\n            loss = smooth_l1_loss(\n                pred, target, weight, reduction=reduction, avg_factor=avg_factor, beta=self.beta)\n        \n        else:\n            loss = l1_loss(\n                pred, target, weight, reduction=reduction, avg_factor=avg_factor)\n        \n        num_points = pred.shape[-1] // 2\n        loss = loss / num_points\n\n        return loss*self.loss_weight\n\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef bce(pred, label, class_weight=None):\n    \"\"\"\n        pred: B,nquery,npts\n        label: B,nquery,npts\n    \"\"\"\n\n    if label.numel() == 0:\n        return pred.sum() * 0\n    assert pred.size() == label.size()\n\n    loss = F.binary_cross_entropy_with_logits(\n        pred, label.float(), pos_weight=class_weight, reduction='none')\n\n    return loss\n\n\n@LOSSES.register_module()\nclass MasksLoss(nn.Module):\n\n    def __init__(self, reduction='mean', loss_weight=1.0):\n        super(MasksLoss, self).__init__()\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function.\n        Args:\n            xxx\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n\n        loss = bce(pred, target, weight, reduction=reduction,\n                   avg_factor=avg_factor)\n\n        return loss*self.loss_weight\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef ce(pred, label, class_weight=None):\n    \"\"\"\n        pred: B*nquery,npts\n        label: B*nquery,\n    \"\"\"\n\n    if label.numel() == 0:\n        return pred.sum() * 0\n\n    loss = F.cross_entropy(\n        pred, label, weight=class_weight, reduction='none')\n\n    return loss\n\n\n@LOSSES.register_module()\nclass LenLoss(nn.Module):\n\n    def __init__(self, reduction='mean', loss_weight=1.0):\n        super(LenLoss, self).__init__()\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function.\n        Args:\n            xxx\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n\n        loss = ce(pred, target, weight, reduction=reduction,\n                   avg_factor=avg_factor)\n\n        return loss*self.loss_weight"
  },
  {
    "path": "mmdet3d/models/fbbev/streammapnet/map_utils.py",
    "content": "from mmdet.core.bbox.transforms import bbox_xyxy_to_cxcywh, bbox_cxcywh_to_xyxy\n\ndef normalize_2d_bbox(bboxes, pc_range):\n\n    patch_h = pc_range[4]-pc_range[1]\n    patch_w = pc_range[3]-pc_range[0]\n    cxcywh_bboxes = bbox_xyxy_to_cxcywh(bboxes)\n    cxcywh_bboxes[...,0:1] = cxcywh_bboxes[..., 0:1] - pc_range[0]\n    cxcywh_bboxes[...,1:2] = cxcywh_bboxes[...,1:2] - pc_range[1]\n    factor = bboxes.new_tensor([patch_w, patch_h,patch_w,patch_h])\n\n    normalized_bboxes = cxcywh_bboxes / factor\n    return normalized_bboxes\n\ndef normalize_2d_pts(pts, pc_range):\n    patch_h = pc_range[4]-pc_range[1]\n    patch_w = pc_range[3]-pc_range[0]\n    new_pts = pts.clone()\n    new_pts[...,0:1] = pts[..., 0:1] - pc_range[0]\n    new_pts[...,1:2] = pts[...,1:2] - pc_range[1]\n    factor = pts.new_tensor([patch_w, patch_h])\n    normalized_pts = new_pts / factor\n    return normalized_pts\n\ndef denormalize_2d_bbox(bboxes, pc_range):\n\n    bboxes = bbox_cxcywh_to_xyxy(bboxes)\n    bboxes[..., 0::2] = (bboxes[..., 0::2]*(pc_range[3] -\n                            pc_range[0]) + pc_range[0])\n    bboxes[..., 1::2] = (bboxes[..., 1::2]*(pc_range[4] -\n                            pc_range[1]) + pc_range[1])\n\n    return bboxes\n\ndef denormalize_2d_pts(pts, pc_range):\n    new_pts = pts.clone()\n    new_pts[...,0:1] = (pts[..., 0:1]*(pc_range[3] -\n                            pc_range[0]) + pc_range[0])\n    new_pts[...,1:2] = (pts[...,1:2]*(pc_range[4] -\n                            pc_range[1]) + pc_range[1])\n    return new_pts"
  },
  {
    "path": "mmdet3d/models/fbbev/streammapnet/streammapnet_head.py",
    "content": "import copy\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport mmcv\nfrom mmcv.cnn import Conv2d, Linear, build_activation_layer, bias_init_with_prob, xavier_init\nfrom mmcv.runner import force_fp32\nfrom mmcv.cnn.bricks.transformer import build_positional_encoding\nfrom mmdet.models.utils import build_transformer\nfrom mmdet.models import build_loss\nfrom mmdet.models.dense_heads.anchor_free_head import AnchorFreeHead\nfrom mmdet.core import multi_apply, reduce_mean, build_assigner, build_sampler\nfrom mmdet.models import HEADS\nfrom mmdet.models.utils.transformer import inverse_sigmoid\nfrom .utils import StreamTensorMemory\nfrom .utils import MotionMLP\n\n@HEADS.register_module(force=True)\nclass MapDetectorHead(nn.Module):\n\n    def __init__(self, \n                 num_queries,\n                 num_classes=3,\n                 in_channels=256,\n                 embed_dims=256,\n                 score_thr=0.1,\n                 num_points=20,\n                 coord_dim=2,\n                 roi_size=(60, 30),\n                 different_heads=True,\n                 predict_refine=False,\n                 bev_pos=None,\n                 sync_cls_avg_factor=True,\n                 bg_cls_weight=0.,\n                 streaming_cfg=dict(),\n                 transformer=dict(),\n                 loss_cls=dict(),\n                 loss_reg=dict(),\n                 assigner=dict(),\n                 map_layer_index=-1,\n                 **kwargs,\n                ):\n        super().__init__()\n        self.num_queries = num_queries\n        self.num_classes = num_classes\n        self.in_channels = in_channels\n        self.embed_dims = embed_dims\n        self.different_heads = different_heads\n        self.predict_refine = predict_refine\n        self.bev_pos = bev_pos\n        self.num_points = num_points\n        self.coord_dim = coord_dim\n        \n        self.sync_cls_avg_factor = sync_cls_avg_factor\n        self.bg_cls_weight = bg_cls_weight\n        self.map_layer_index = map_layer_index\n        if streaming_cfg:\n            self.streaming_query = streaming_cfg['streaming']\n        else:\n            self.streaming_query = False\n        if self.streaming_query:\n            self.batch_size = streaming_cfg['batch_size']\n            self.topk_query = streaming_cfg['topk']\n            self.trans_loss_weight = streaming_cfg.get('trans_loss_weight', 0.0)\n            self.query_memory = StreamTensorMemory(\n                self.batch_size,\n            )\n            self.reference_points_memory = StreamTensorMemory(\n                self.batch_size,\n            )\n            c_dim = 12\n\n            self.query_update = MotionMLP(c_dim=c_dim, f_dim=self.embed_dims, identity=True)\n            self.target_memory = StreamTensorMemory(self.batch_size)\n            \n        self.register_buffer('roi_size', torch.tensor(roi_size, dtype=torch.float32))\n        origin = (-roi_size[0]/2, -roi_size[1]/2)\n        self.register_buffer('origin', torch.tensor(origin, dtype=torch.float32))\n\n        sampler_cfg = dict(type='PseudoSampler')\n        self.sampler = build_sampler(sampler_cfg, context=self)\n\n        self.transformer = build_transformer(transformer)\n\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_reg = build_loss(loss_reg)\n        self.assigner = build_assigner(assigner)\n\n        if self.loss_cls.use_sigmoid:\n            self.cls_out_channels = num_classes\n        else:\n            self.cls_out_channels = num_classes + 1\n        \n        self._init_embedding()\n        self._init_branch()\n        self.init_weights()\n\n\n    def init_weights(self):\n        \"\"\"Initialize weights of the DeformDETR head.\"\"\"\n\n        for p in self.input_proj.parameters():\n            if p.dim() > 1:\n                nn.init.xavier_uniform_(p)\n        \n        xavier_init(self.reference_points_embed, distribution='uniform', bias=0.)\n\n        self.transformer.init_weights()\n\n        # init prediction branch\n        for m in self.reg_branches:\n            for param in m.parameters():\n                if param.dim() > 1:\n                    nn.init.xavier_uniform_(param)\n\n        # focal loss init\n        if self.loss_cls.use_sigmoid:\n            bias_init = bias_init_with_prob(0.01)\n            if isinstance(self.cls_branches, nn.ModuleList):\n                for m in self.cls_branches:\n                    if hasattr(m, 'bias'):\n                        nn.init.constant_(m.bias, bias_init)\n            else:\n                m = self.cls_branches\n                nn.init.constant_(m.bias, bias_init)\n        \n        if self.streaming_query:\n            if isinstance(self.query_update, MotionMLP):\n                self.query_update.init_weights()\n            if hasattr(self, 'query_alpha'):\n                for m in self.query_alpha:\n                    for param in m.parameters():\n                        if param.dim() > 1:\n                            nn.init.zeros_(param)\n\n    def _init_embedding(self):\n        positional_encoding = dict(\n            type='SinePositionalEncoding',\n            num_feats=self.embed_dims//2,\n            normalize=True\n        )\n        self.bev_pos_embed = build_positional_encoding(positional_encoding)\n\n        # query_pos_embed & query_embed\n        self.query_embedding = nn.Embedding(self.num_queries,\n                                            self.embed_dims)\n\n        self.reference_points_embed = nn.Linear(self.embed_dims, self.num_points * 2)\n        \n    def _init_branch(self,):\n        \"\"\"Initialize classification branch and regression branch of head.\"\"\"\n        self.input_proj = Conv2d(\n            self.in_channels, self.embed_dims, kernel_size=1)\n\n        cls_branch = Linear(self.embed_dims, self.cls_out_channels)\n\n        reg_branch = [\n            Linear(self.embed_dims, 2*self.embed_dims),\n            nn.LayerNorm(2*self.embed_dims),\n            nn.ReLU(),\n            Linear(2*self.embed_dims, 2*self.embed_dims),\n            nn.LayerNorm(2*self.embed_dims),\n            nn.ReLU(),\n            Linear(2*self.embed_dims, self.num_points * self.coord_dim),\n        ]\n        reg_branch = nn.Sequential(*reg_branch)\n\n        num_layers = self.transformer.decoder.num_layers\n        if self.different_heads:\n            cls_branches = nn.ModuleList(\n                [copy.deepcopy(cls_branch) for _ in range(num_layers)])\n            reg_branches = nn.ModuleList(\n                [copy.deepcopy(reg_branch) for _ in range(num_layers)])\n        else:\n            cls_branches = nn.ModuleList(\n                [cls_branch for _ in range(num_layers)])\n            reg_branches = nn.ModuleList(\n                [reg_branch for _ in range(num_layers)])\n\n        self.reg_branches = reg_branches\n        self.cls_branches = cls_branches\n\n    def _prepare_context(self, bev_features):\n        \"\"\"Prepare class label and vertex context.\"\"\"\n        device = bev_features.device\n\n        # Add 2D coordinate grid embedding\n        B, C, H, W = bev_features.shape\n        bev_mask = bev_features.new_zeros(B, H, W)\n        bev_pos_embeddings = self.bev_pos_embed(bev_mask) # (bs, embed_dims, H, W)\n\n        bev_features = self.input_proj(bev_features) + bev_pos_embeddings # (bs, embed_dims, H, W)\n    \n        assert list(bev_features.shape) == [B, self.embed_dims, H, W]\n        return bev_features\n\n    def propagate(self, query_embedding, img_metas, start_of_sequence, ego_pose_inv, return_loss=True):\n        bs = query_embedding.shape[0]\n        propagated_query_list = []\n        prop_reference_points_list = []\n        \n        tmp = self.query_memory.get(img_metas)\n        query_memory, pose_memory = tmp['tensor'], tmp['img_metas']\n\n        tmp = self.reference_points_memory.get(img_metas)\n        ref_pts_memory, pose_memory = tmp['tensor'], tmp['img_metas']\n\n        if return_loss:\n            target_memory = self.target_memory.get(img_metas)['tensor']\n            trans_loss = query_embedding.new_zeros((1,))\n            num_pos = 0\n\n        is_first_frame_list = start_of_sequence\n\n        for i in range(bs):\n            is_first_frame = is_first_frame_list[i]\n            if is_first_frame:\n                padding = query_embedding.new_zeros((self.topk_query, self.embed_dims))\n                if return_loss:\n                    trans_loss += self.query_update(\n                         padding, # (topk, embed_dims)\n                        padding.new_zeros((self.topk_query, 12))\n                    ).sum() * 0\n                propagated_query_list.append(padding)\n                padding = query_embedding.new_zeros((self.topk_query, self.num_points, 2))\n                prop_reference_points_list.append(padding)\n            else:\n                curr_to_prev_ego_rt = query_embedding.new_tensor(img_metas[i]['curr_to_prev_ego_rt'], dtype=torch.float64).to(query_embedding.device)\n                pos_encoding = curr_to_prev_ego_rt.float()[:3].view(-1)\n                prop_q = query_memory[i]\n                # query_memory_updated = prop_q\n                query_memory_updated = self.query_update(\n                    prop_q, # (topk, embed_dims)\n                    pos_encoding.view(1, -1).repeat(len(query_memory[i]), 1) * 0,\n                )\n                propagated_query_list.append(query_memory_updated.clone())\n\n                pred = self.reg_branches[-1](query_memory_updated).sigmoid() # (num_prop, 2*num_pts)\n                assert list(pred.shape) == [self.topk_query, 2*self.num_points]\n\n                if return_loss:\n                    targets = target_memory[i]\n                    weights = targets.new_ones((self.topk_query, 2*self.num_points))\n                    bg_idx = torch.all(targets.view(self.topk_query, -1) == -1e5, dim=1)\n                    num_pos = num_pos + (self.topk_query - bg_idx.sum())\n                    weights[bg_idx, :] = 0.0\n\n                    # global -> ego\n                    curr_targets = torch.einsum('lk,ijk->ijl', ego_pose_inv[i].float(), targets)\n                    normed_targets = (curr_targets[..., :2] - self.origin) / self.roi_size # (num_prop, num_pts, 2)\n                    normed_targets = torch.clip(normed_targets, min=0., max=1.).reshape(-1, 2*self.num_points)\n                    # (num_prop, 2*num_pts)\n                    trans_loss += self.loss_reg(pred, normed_targets, weights, avg_factor=1.0)\n                    # trans_loss = None\n                # ref pts\n                prev_ref_pts = ref_pts_memory[i]\n                curr_ref_pts = torch.einsum('lk,ijk->ijl', ego_pose_inv[i].double(), prev_ref_pts.double()).float()\n                normed_ref_pts = (curr_ref_pts[..., :2] - self.origin) / self.roi_size # (num_prop, num_pts, 2)\n                # self.visual_sample(normed_ref_pts, img_metas[i]['index'], prev=True)\n                normed_ref_pts = torch.clip(normed_ref_pts, min=0., max=1.)\n\n                prop_reference_points_list.append(normed_ref_pts)\n                \n        prop_query_embedding = torch.stack(propagated_query_list) # (bs, topk, embed_dims)\n        prop_ref_pts = torch.stack(prop_reference_points_list) # (bs, topk, num_pts, 2)\n        assert list(prop_query_embedding.shape) == [bs, self.topk_query, self.embed_dims]\n        assert list(prop_ref_pts.shape) == [bs, self.topk_query, self.num_points, 2]\n        \n        init_reference_points = self.reference_points_embed(query_embedding).sigmoid() # (bs, num_q, 2*num_pts)\n        init_reference_points = init_reference_points.view(bs, self.num_queries, self.num_points, 2) # (bs, num_q, num_pts, 2)\n        memory_query_embedding = None\n\n        if return_loss:\n            trans_loss = self.trans_loss_weight * trans_loss / (num_pos + 1e-10)\n            return query_embedding, prop_query_embedding, init_reference_points, prop_ref_pts, memory_query_embedding, is_first_frame_list, trans_loss\n        else:\n            return query_embedding, prop_query_embedding, init_reference_points, prop_ref_pts, memory_query_embedding, is_first_frame_list\n\n    def forward_train(self, input_dict, img_metas, map_gt_bboxes_3d, map_gt_labels_3d):\n        '''\n        Args:\n            bev_feature (List[Tensor]): shape [B, C, H, W]\n                feature in bev view\n        Outs:\n            preds_dict (list[dict]):\n                lines (Tensor): Classification score of all\n                    decoder layers, has shape\n                    [bs, num_query, 2*num_points]\n                scores (Tensor):\n                    [bs, num_query,]\n        '''\n\n        if input_dict['img_bev_feat'][0].dim() == 5:\n            bev_features = [level.mean(-1) for level in input_dict['img_bev_feat']][0]\n        else:\n            bev_features = input_dict['img_bev_feat'][0]\n\n        start_of_sequence = torch.FloatTensor([\n            single_img_metas['start_of_sequence'] \n            for single_img_metas in img_metas]).to(input_dict['img_bev_feat'][0].device)\n\n        ego_pose_inv = torch.stack([\n            single_img_metas['ego_pose_inv'] \n            for single_img_metas in img_metas], 0).to(input_dict['img_bev_feat'][0].device)\n\n        ego_pose = torch.stack([\n            single_img_metas['ego_pose'] \n            for single_img_metas in img_metas], 0).to(input_dict['img_bev_feat'][0].device)\n\n        bev_features = self._prepare_context(bev_features)\n\n        bs, C, H, W = bev_features.shape\n        img_masks = bev_features.new_zeros((bs, H, W))\n        # pos_embed = self.positional_encoding(img_masks)\n        pos_embed = None\n\n        query_embedding = self.query_embedding.weight[None, ...].repeat(bs, 1, 1) # [B, num_q, embed_dims]\n        input_query_num = self.num_queries\n        # num query: self.num_query + self.topk\n        if self.streaming_query:\n            query_embedding, prop_query_embedding, init_reference_points, prop_ref_pts, memory_query, is_first_frame_list, trans_loss = \\\n                self.propagate(query_embedding, img_metas, start_of_sequence, ego_pose_inv, return_loss=True)\n        else:\n            init_reference_points = self.reference_points_embed(query_embedding).sigmoid() # (bs, num_q, 2*num_pts)\n            init_reference_points = init_reference_points.view(-1, self.num_queries, self.num_points, 2) # (bs, num_q, num_pts, 2)\n            prop_query_embedding = None\n            prop_ref_pts = None\n            is_first_frame_list = [True for i in range(bs)]\n        \n        assert list(init_reference_points.shape) == [bs, self.num_queries, self.num_points, 2]\n        assert list(query_embedding.shape) == [bs, self.num_queries, self.embed_dims]\n\n        # outs_dec: (num_layers, num_qs, bs, embed_dims)\n        inter_queries, init_reference, inter_references = self.transformer(\n            mlvl_feats=[bev_features,],\n            mlvl_masks=[img_masks.type(torch.bool)],\n            query_embed=query_embedding,\n            prop_query=prop_query_embedding,\n            mlvl_pos_embeds=[pos_embed], # not used\n            memory_query=None,\n            init_reference_points=init_reference_points,\n            prop_reference_points=prop_ref_pts,\n            reg_branches=self.reg_branches,\n            cls_branches=self.cls_branches,\n            predict_refine=self.predict_refine,\n            is_first_frame_list=is_first_frame_list,\n            query_key_padding_mask=query_embedding.new_zeros((bs, self.num_queries), dtype=torch.bool), # mask used in self-attn,\n        )\n        outputs = []\n        for i, (queries) in enumerate(inter_queries):\n            reg_points = inter_references[i] # (bs, num_q, num_points, 2)\n            bs = reg_points.shape[0]\n            reg_points = reg_points.view(bs, -1, 2*self.num_points) # (bs, num_q, 2*num_points)\n\n            scores = self.cls_branches[i](queries) # (bs, num_q, num_classes)\n\n            reg_points_list = []\n            scores_list = []\n            for j in range(len(scores)):\n                # padding queries should not be output\n                reg_points_list.append(reg_points[j])\n                scores_list.append(scores[j])\n\n            pred_dict = {\n                'lines': torch.stack(reg_points_list),\n                'scores': torch.stack(scores_list),\n                'queries': queries,\n            }\n            # if i == len(inter_queries)-1:\n                \n            #     map_queries = queries\n            #     map_lines = map_lines\n            #     map_scores = map_scores\n\n            outputs.append(pred_dict)\n\n        loss_dict, det_match_idxs, det_match_gt_idxs, gt_lines_list = self.loss(map_gt_bboxes_3d, map_gt_labels_3d, outputs, img_metas)\n\n        if self.streaming_query:\n            query_list = []\n            ref_pts_list = []\n            gt_targets_list = []\n            lines, scores = outputs[self.map_layer_index]['lines'], outputs[self.map_layer_index]['scores']\n            gt_lines = gt_lines_list[ self.map_layer_index] # take results from the last layer\n\n            for i in range(bs):\n                _lines = lines[i]\n                _queries = inter_queries[self.map_layer_index][i]\n                _scores = scores[i]\n                _gt_targets = gt_lines[i] # (num_q or num_q+topk, 20, 2)\n                assert len(_lines) == len(_queries)\n                assert len(_lines) == len(_gt_targets)\n\n                _scores, _ = _scores.max(-1)\n                topk_score, topk_idx = _scores.topk(k=self.topk_query, dim=-1)\n\n                _queries = _queries[topk_idx] # (topk, embed_dims)\n                _lines = _lines[topk_idx] # (topk, 2*num_pts)\n                _gt_targets = _gt_targets[topk_idx] # (topk, 20, 2)\n                query_list.append(_queries)\n\n                _lines = _lines.view(-1, self.num_points, 2)\n                _lines = _lines * self.roi_size + self.origin\n                _lines = torch.cat([_lines, torch.zeros_like(_lines[..., 0:1]), torch.ones_like(_lines[..., 0:1])], dim=-1)\n                _lines = (ego_pose[i] @ _lines.unsqueeze(-1)).squeeze(-1)\n                ref_pts_list.append(_lines)\n\n                _gt_targets = _gt_targets.view(-1, self.num_points, 2)\n                mask = _gt_targets == 0.0\n                _gt_targets =_gt_targets * self.roi_size + self.origin\n                _gt_targets = torch.cat([_gt_targets, torch.zeros_like(_lines[..., 0:1]), torch.ones_like(_lines[..., 0:1])], dim=-1)\n                _gt_targets = (ego_pose[i] @ _gt_targets.unsqueeze(-1)).squeeze(-1)\n                _gt_targets[mask.repeat(1, 1, 2)] = -1e5\n                gt_targets_list.append(_gt_targets)\n\n\n            self.query_memory.update(query_list, img_metas)\n            self.reference_points_memory.update(ref_pts_list, img_metas)\n            self.target_memory.update(gt_targets_list, img_metas)\n\n            loss_dict['trans_loss'] = trans_loss\n        return loss_dict, outputs\n        # return outputs, loss_dict, det_match_idxs, det_match_gt_idxs\n    \n    def forward_test(self, input_dict, img_metas, map_gt_bboxes_3d=None, map_gt_labels_3d=None):\n        '''\n        Args:\n            bev_feature (List[Tensor]): shape [B, C, H, W]\n                feature in bev view\n        Outs:\n            preds_dict (list[dict]):\n                lines (Tensor): Classification score of all\n                    decoder layers, has shape\n                    [bs, num_query, 2*num_points]\n                scores (Tensor):\n                    [bs, num_query,]\n        '''\n\n        if input_dict['img_bev_feat'][0].dim() == 5:\n            bev_features = [level.mean(-1) for level in input_dict['img_bev_feat']][0]\n        else:\n            bev_features = input_dict['img_bev_feat'][0]\n\n        start_of_sequence = torch.FloatTensor([\n            single_img_metas['start_of_sequence'] \n            for single_img_metas in img_metas]).to(input_dict['img_bev_feat'][0].device)\n\n        ego_pose_inv = torch.stack([\n            single_img_metas['ego_pose_inv'] \n            for single_img_metas in img_metas], 0).to(input_dict['img_bev_feat'][0].device)\n\n        ego_pose = torch.stack([\n            single_img_metas['ego_pose'] \n            for single_img_metas in img_metas], 0).to(input_dict['img_bev_feat'][0].device)\n\n\n        bev_features = self._prepare_context(bev_features)\n\n        bs, C, H, W = bev_features.shape\n        img_masks = bev_features.new_zeros((bs, H, W))\n        # pos_embed = self.positional_encoding(img_masks)\n        pos_embed = None\n\n        query_embedding = self.query_embedding.weight[None, ...].repeat(bs, 1, 1) # [B, num_q, embed_dims]\n        input_query_num = self.num_queries\n        # num query: self.num_query + self.topk\n        if self.streaming_query:\n            query_embedding, prop_query_embedding, init_reference_points, prop_ref_pts, memory_query, is_first_frame_list = \\\n                self.propagate(query_embedding, img_metas, start_of_sequence, ego_pose_inv, return_loss=False)\n            \n        else:\n            init_reference_points = self.reference_points_embed(query_embedding).sigmoid() # (bs, num_q, 2*num_pts)\n            init_reference_points = init_reference_points.view(-1, self.num_queries, self.num_points, 2) # (bs, num_q, num_pts, 2)\n            prop_query_embedding = None\n            prop_ref_pts = None\n            is_first_frame_list = [True for i in range(bs)]\n        \n        assert list(init_reference_points.shape) == [bs, input_query_num, self.num_points, 2]\n        assert list(query_embedding.shape) == [bs, input_query_num, self.embed_dims]\n\n        # outs_dec: (num_layers, num_qs, bs, embed_dims)\n        inter_queries, init_reference, inter_references = self.transformer(\n            mlvl_feats=[bev_features,],\n            mlvl_masks=[img_masks.type(torch.bool)],\n            query_embed=query_embedding,\n            prop_query=prop_query_embedding,\n            mlvl_pos_embeds=[pos_embed], # not used\n            memory_query=None,\n            init_reference_points=init_reference_points,\n            prop_reference_points=prop_ref_pts,\n            reg_branches=self.reg_branches,\n            cls_branches=self.cls_branches,\n            predict_refine=self.predict_refine,\n            is_first_frame_list=is_first_frame_list,\n            query_key_padding_mask=query_embedding.new_zeros((bs, self.num_queries), dtype=torch.bool), # mask used in self-attn,\n        )\n\n        outputs = []\n        for i, (queries) in enumerate(inter_queries):\n            reg_points = inter_references[i] # (bs, num_q, num_points, 2)\n            bs = reg_points.shape[0]\n            reg_points = reg_points.view(bs, -1, 2*self.num_points) # (bs, num_q, 2*num_points)\n            scores = self.cls_branches[i](queries) # (bs, num_q, num_classes)\n\n            reg_points_list = []\n            scores_list = []\n            prop_mask_list = []\n            for i in range(len(scores)):\n                # padding queries should not be output\n                reg_points_list.append(reg_points[i])\n                scores_list.append(scores[i])\n                prop_mask = scores.new_ones((len(scores[i]), ), dtype=torch.bool)\n                prop_mask[-self.num_queries:] = False\n                prop_mask_list.append(prop_mask)\n\n            pred_dict = {\n                'lines': torch.stack(reg_points_list),\n                'scores': torch.stack(scores_list),\n                'prop_mask': torch.stack(prop_mask_list),\n                'queries': queries\n            }\n            outputs.append(pred_dict)\n        \n        if self.streaming_query:\n            query_list = []\n            ref_pts_list = []\n            lines, scores = outputs[self.map_layer_index]['lines'], outputs[ self.map_layer_index]['scores']\n            for i in range(bs):\n                _lines = lines[i]\n                _queries = inter_queries[ self.map_layer_index][i]\n                _scores = scores[i]\n                assert len(_lines) == len(_queries)\n                _scores, _ = _scores.max(-1)\n                topk_score, topk_idx = _scores.topk(k=self.topk_query, dim=-1)\n\n                _queries = _queries[topk_idx] # (topk, embed_dims)\n                _lines = _lines[topk_idx] # (topk, 2*num_pts)\n                \n                query_list.append(_queries)\n               \n                _lines = _lines.view(-1, self.num_points, 2)\n                # self.visual_sample(_lines, img_metas[i]['index'], pre=False)\n                _lines = _lines * self.roi_size + self.origin\n                _lines = torch.cat([_lines, torch.zeros_like(_lines[..., 0:1]), torch.ones_like(_lines[..., 0:1])], dim=-1)\n                _lines = (ego_pose[i] @ _lines.unsqueeze(-1)).squeeze(-1)\n                ref_pts_list.append(_lines)\n\n\n            self.query_memory.update(query_list, img_metas)\n            self.reference_points_memory.update(ref_pts_list, img_metas)\n        gt_lane = map_gt_bboxes_3d[0][0].fixed_num_sampled_points.to(ego_pose[0].device)\n        gt_lane = torch.cat([gt_lane, torch.zeros_like(gt_lane[..., 0:1]), torch.ones_like(gt_lane[..., 0:1])], dim=-1)\n        gt_lane = (ego_pose[0] @ gt_lane.unsqueeze(-1)).squeeze(-1)[..., :2]\n\n        gt_lane_label = map_gt_labels_3d[0][0]\n        outputs[-1]['gt_lane_in_global'] = gt_lane\n        outputs[-1]['gt_lane_label'] = gt_lane_label\n\n        return outputs \n\n    def world2bev_vis(self, x, y):\n             return int(x * 640), int(y*320)\n\n    def visual_sample(self, lines, index, pre=False, **kwargs):\n\n        import cv2\n\n        bev_img = np.ones([640, 640, 3], dtype=np.float32) * 255\n        bev_img = bev_img.astype(np.float32)\n\n        bev_img = cv2.circle(bev_img, self.world2bev_vis(0.5, 1.5), 5, (0, 255, 0), thickness=-1)\n        \n        for k, line in enumerate(lines):\n                label = 0\n                line = line.cpu().numpy()\n                corners = np.array([self.world2bev_vis(*corner) for corner in line])\n                corners = [each for each in corners if ((each>=0).all() & (each<1500).all())]\n                corners = [(x, y+320) for (x, y) in corners ]\n                colors = [(255, 255, 0), (255, 0, 0), (0, 255, 0)]\n                for i, corner in enumerate(corners[:-1]):\n                    bev_img = cv2.circle(bev_img, corners[i], 2, (61, 102, 255))\n                    bev_img = cv2.line(bev_img, corners[i], corners[i+1], color=colors[label], thickness=1)\n        if pre:\n            mmcv.imwrite(bev_img, f'pred_bev_{index}_prev.png')\n        else:\n            mmcv.imwrite(bev_img, f'pred_bev_{index}_after.png')\n\n\n\n    @force_fp32(apply_to=('score_pred', 'lines_pred', 'gt_lines'))\n    def _get_target_single(self,\n                           score_pred,\n                           lines_pred,\n                           gt_labels,\n                           gt_lines,\n                           valid_map,\n                           gt_bboxes_ignore=None):\n        \"\"\"\n            Compute regression and classification targets for one image.\n            Outputs from a single decoder layer of a single feature level are used.\n            Args:\n                score_pred (Tensor): Box score logits from a single decoder layer\n                    for one image. Shape [num_query, cls_out_channels].\n                lines_pred (Tensor):\n                    shape [num_query, 2*num_points]\n                gt_labels (torch.LongTensor)\n                    shape [num_gt, ]\n                gt_lines (Tensor):\n                    shape [num_gt, 2*num_points].\n                \n            Returns:\n                tuple[Tensor]: a tuple containing the following for one sample.\n                    - labels (LongTensor): Labels of each image.\n                        shape [num_query, 1]\n                    - label_weights (Tensor]): Label weights of each image.\n                        shape [num_query, 1]\n                    - lines_target (Tensor): Lines targets of each image.\n                        shape [num_query, num_points, 2]\n                    - lines_weights (Tensor): Lines weights of each image.\n                        shape [num_query, num_points, 2]\n                    - pos_inds (Tensor): Sampled positive indices for each image.\n                    - neg_inds (Tensor): Sampled negative indices for each image.\n        \"\"\"\n        num_pred_lines = len(lines_pred)\n        # assigner and sampler\n        assign_result, gt_permute_idx = self.assigner.assign(preds=dict(lines=lines_pred, scores=score_pred,),\n                                             gts=dict(lines=gt_lines,\n                                                      labels=gt_labels, ),\n                                             gt_bboxes_ignore=gt_bboxes_ignore)\n        if gt_lines.dim() == 4:\n            gt_lines = gt_lines.flatten(-2, -1)\n        sampling_result = self.sampler.sample(\n            assign_result, lines_pred, gt_lines)\n        num_gt = len(gt_lines)\n        pos_inds = sampling_result.pos_inds\n        neg_inds = sampling_result.neg_inds\n        pos_gt_inds = sampling_result.pos_assigned_gt_inds\n\n        labels = gt_lines.new_full(\n                (num_pred_lines, ), self.num_classes, dtype=torch.long) # (num_q, )\n        if valid_map:\n            labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]\n        label_weights = gt_lines.new_ones(num_pred_lines) # (num_q, )\n\n        lines_target = torch.zeros_like(lines_pred) # (num_q, 2*num_pts)\n        lines_weights = torch.zeros_like(lines_pred) # (num_q, 2*num_pts)\n        \n        if num_gt > 0 and valid_map:\n            if gt_permute_idx is not None: # using permute invariant label\n                # gt_permute_idx: (num_q, num_gt)\n                # pos_inds: which query is positive\n                # pos_gt_inds: which gt each pos pred is assigned\n                # single_matched_gt_permute_idx: which permute order is matched\n                single_matched_gt_permute_idx = gt_permute_idx[\n                    pos_inds, pos_gt_inds\n                ]\n                lines_target[pos_inds] = gt_lines[pos_gt_inds, single_matched_gt_permute_idx].type(\n                    lines_target.dtype) # (num_q, 2*num_pts)\n            else:\n                lines_target[pos_inds] = sampling_result.pos_gt_bboxes.type(\n                    lines_target.dtype) # (num_q, 2*num_pts)\n        \n        lines_weights[pos_inds] = 1.0 # (num_q, 2*num_pts)\n\n        # normalization\n        # n = lines_weights.sum(-1, keepdim=True) # (num_q, 1)\n        # lines_weights = lines_weights / n.masked_fill(n == 0, 1) # (num_q, 2*num_pts)\n        # [0, ..., 0] for neg ind and [1/npts, ..., 1/npts] for pos ind\n\n        return (labels, label_weights, lines_target, lines_weights,\n                pos_inds, neg_inds, pos_gt_inds)\n\n    # @force_fp32(apply_to=('preds', 'gts'))\n    def get_targets(self, preds, map_gt_bboxes_3d,\n             map_gt_labels_3d, valid_map, gt_bboxes_ignore_list=None):\n        \"\"\"\n            Compute regression and classification targets for a batch image.\n            Outputs from a single decoder layer of a single feature level are used.\n            Args:\n                preds (dict): \n                    - lines (Tensor): shape (bs, num_queries, 2*num_points)\n                    - scores (Tensor): shape (bs, num_queries, num_class_channels)\n                gts (dict):\n                    - class_label (list[Tensor]): tensor shape (num_gts, )\n                    - lines (list[Tensor]): tensor shape (num_gts, 2*num_points)\n                gt_bboxes_ignore_list (list[Tensor], optional): Bounding\n                    boxes which can be ignored for each image. Default None.\n            Returns:\n                tuple: a tuple containing the following targets.\n                    - labels_list (list[Tensor]): Labels for all images.\n                    - label_weights_list (list[Tensor]): Label weights for all \\\n                        images.\n                    - lines_targets_list (list[Tensor]): Lines targets for all \\\n                        images.\n                    - lines_weight_list (list[Tensor]): Lines weights for all \\\n                        images.\n                    - num_total_pos (int): Number of positive samples in all \\\n                        images.\n                    - num_total_neg (int): Number of negative samples in all \\\n                        images.\n        \"\"\"\n        assert gt_bboxes_ignore_list is None, \\\n            'Only supports for gt_bboxes_ignore setting to None.'\n\n        # format the inputs\n        gt_labels = map_gt_labels_3d\n        gt_lines = map_gt_bboxes_3d\n\n        lines_pred = preds['lines']\n\n        (labels_list, label_weights_list,\n        lines_targets_list, lines_weights_list,\n        pos_inds_list, neg_inds_list,pos_gt_inds_list) = multi_apply(\n            self._get_target_single, preds['scores'], lines_pred,\n            gt_labels, gt_lines, valid_map, gt_bboxes_ignore=gt_bboxes_ignore_list)\n        \n        num_total_pos = sum((inds.numel() for inds in pos_inds_list))\n        num_total_neg = sum((inds.numel() for inds in neg_inds_list))\n        new_gts = dict(\n            labels=labels_list, # list[Tensor(num_q, )], length=bs\n            label_weights=label_weights_list, # list[Tensor(num_q, )], length=bs, all ones\n            lines=lines_targets_list, # list[Tensor(num_q, 2*num_pts)], length=bs\n            lines_weights=lines_weights_list, # list[Tensor(num_q, 2*num_pts)], length=bs\n        )\n\n        return new_gts, num_total_pos, num_total_neg, pos_inds_list, pos_gt_inds_list\n\n    # @force_fp32(apply_to=('preds', 'gts'))\n    def loss_single(self,\n                    preds,\n                    map_gt_bboxes_3d,\n                    map_gt_labels_3d,\n                    valid_map,\n                    gt_bboxes_ignore_list=None,\n                    reduction='none'):\n        \"\"\"\n            Loss function for outputs from a single decoder layer of a single\n            feature level.\n            Args:\n                preds (dict): \n                    - lines (Tensor): shape (bs, num_queries, 2*num_points)\n                    - scores (Tensor): shape (bs, num_queries, num_class_channels)\n                gts (dict):\n                    - class_label (list[Tensor]): tensor shape (num_gts, )\n                    - lines (list[Tensor]): tensor shape (num_gts, 2*num_points)\n                gt_bboxes_ignore_list (list[Tensor], optional): Bounding\n                    boxes which can be ignored for each image. Default None.\n            Returns:\n                dict[str, Tensor]: A dictionary of loss components for outputs from\n                    a single decoder layer.\n        \"\"\"\n\n        # Get target for each sample\n        new_gts, num_total_pos, num_total_neg, pos_inds_list, pos_gt_inds_list =\\\n            self.get_targets(preds, map_gt_bboxes_3d,\n             map_gt_labels_3d, valid_map, gt_bboxes_ignore_list)\n\n        # Batched all data\n        # for k, v in new_gts.items():\n        #     new_gts[k] = torch.stack(v, dim=0) # tensor (bs, num_q, ...)\n\n        # construct weighted avg_factor to match with the official DETR repo\n        cls_avg_factor = num_total_pos * 1.0 + \\\n            num_total_neg * self.bg_cls_weight\n        \n        if self.sync_cls_avg_factor:\n            cls_avg_factor = reduce_mean(\n                preds['scores'][0].new_tensor([cls_avg_factor]))\n        cls_avg_factor = max(cls_avg_factor, 1)\n\n        # Classification loss\n        # since the inputs needs the second dim is the class dim, we permute the prediction.\n\n        pred_scores = preds['scores'].flatten(0, 1) # (bs*num_q, cls_out_channles)\n        cls_scores = pred_scores.reshape(-1, self.cls_out_channels) # (bs*num_q, cls_out_channels)\n        cls_labels = torch.cat(new_gts['labels'], dim=0).reshape(-1) # (bs*num_q, )\n        cls_weights = torch.cat(new_gts['label_weights'], dim=0).reshape(-1) # (bs*num_q, )\n\n        loss_cls = self.loss_cls(\n            cls_scores, cls_labels, cls_weights, avg_factor=cls_avg_factor)\n        \n        # Compute the average number of gt boxes across all gpus, for\n        # normalization purposes\n        num_total_pos = loss_cls.new_tensor([num_total_pos])\n        num_total_pos = torch.clamp(reduce_mean(num_total_pos), min=1).item()\n\n        pred_lines = preds['lines'].flatten(0, 1)\n        gt_lines = torch.cat(new_gts['lines'], dim=0)\n        line_weights = torch.cat(new_gts['lines_weights'], dim=0)\n\n        assert len(pred_lines) == len(gt_lines)\n        assert len(gt_lines) == len(line_weights)\n\n        loss_reg = self.loss_reg(\n            pred_lines, gt_lines, line_weights, avg_factor=num_total_pos)\n\n        loss_dict = dict(\n            loss_cls=loss_cls,\n            loss_reg=loss_reg,\n        )\n\n        return loss_dict, pos_inds_list, pos_gt_inds_list, new_gts['lines']\n    \n    @force_fp32(apply_to=('map_gt_bboxes_3d', 'preds'))\n    def loss(self,\n             map_gt_bboxes_3d,\n             map_gt_labels_3d,\n             preds,\n             img_metas,\n             gt_bboxes_ignore=None,\n             reduction='mean'):\n        \"\"\"\n            Loss Function.\n            Args:\n                gts (list[dict]): list length: num_layers\n                    dict {\n                        'label': list[tensor(num_gts, )], list length: batchsize,\n                        'line': list[tensor(num_gts, 2*num_points)], list length: batchsize,\n                        ...\n                    }\n                preds (list[dict]): list length: num_layers\n                    dict {\n                        'lines': tensor(bs, num_queries, 2*num_points),\n                        'scores': tensor(bs, num_queries, class_out_channels),\n                    }\n                    \n                gt_bboxes_ignore (list[Tensor], optional): Bounding boxes\n                    which can be ignored for each image. Default None.\n            Returns:\n                dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        \n        assert gt_bboxes_ignore is None, \\\n                f'{self.__class__.__name__} only supports ' \\\n                f'for gt_bboxes_ignore setting to None.'\n        \n\n        # Since there might have multi layer\n        num_dec_layers = len(preds)\n        map_gt_bboxes_3d_list = [map_gt_bboxes_3d for _ in range(num_dec_layers)]\n        map_gt_labels_3d_list = [map_gt_labels_3d for _ in range(num_dec_layers)]\n\n        valid_map = torch.tensor([each['has_valid_map'] for each in img_metas], device=map_gt_bboxes_3d[0].device)\n        valid_map_list = [valid_map for _ in range(num_dec_layers)]\n\n        losses, pos_inds_lists, pos_gt_inds_lists, gt_lines_list = multi_apply(\n            self.loss_single, preds, map_gt_bboxes_3d_list,\n             map_gt_labels_3d_list , valid_map_list, reduction=reduction)\n\n        # Format the losses\n        loss_dict = dict()\n        # loss from the last decoder layer\n        for k, v in losses[-1].items():\n            loss_dict[k] = v\n        \n        # Loss from other decoder layers\n        num_dec_layer = 0\n        for loss in losses[:-1]:\n            for k, v in loss.items():\n                loss_dict[f'd{num_dec_layer}.{k}'] = v\n            num_dec_layer += 1\n\n        return loss_dict, pos_inds_lists, pos_gt_inds_lists, gt_lines_list\n    \n    def get_bboxes(self, preds_dict, img_metas, thr=0.0):\n\n        preds_dict = preds_dict[-1]\n        lines = preds_dict['lines'] # List[Tensor(num_queries, 2*num_points)]\n        bs = len(lines)\n        scores = preds_dict['scores'] # (bs, num_queries, 3)\n        prop_mask = preds_dict['prop_mask']\n\n        results = []\n        for i in range(bs):\n            tmp_vectors = lines[i]\n            tmp_prop_mask = prop_mask[i]\n            num_preds, num_points2 = tmp_vectors.shape\n            tmp_vectors = tmp_vectors.view(num_preds, num_points2//2, 2)\n            # focal loss\n            if self.loss_cls.use_sigmoid:\n                tmp_scores, tmp_labels = scores[i].max(-1)\n                tmp_scores = tmp_scores.sigmoid()\n                pos = tmp_scores > thr\n            else:\n                assert self.num_classes + 1 == self.cls_out_channels\n                tmp_scores, tmp_labels = scores[i].max(-1)\n                bg_cls = self.cls_out_channels\n                pos = tmp_labels != bg_cls\n\n            tmp_vectors = tmp_vectors[pos]\n            tmp_scores = tmp_scores[pos]\n            tmp_labels = tmp_labels[pos]\n            tmp_prop_mask = tmp_prop_mask[pos]\n            \n            tmp_vectors = tmp_vectors * self.roi_size + self.origin\n\n            if len(tmp_scores) == 0:\n                single_result = {\n                'map_pts_3d': [],\n                'map_scores_3d': [],\n                'map_labels_3d': [],\n                'prop_mask': [],\n                'index': img_metas[0]['index']\n            }\n            else:\n                single_result = {\n                    'map_pts_3d': tmp_vectors.detach().cpu(), # .numpy(),\n                    'map_scores_3d': tmp_scores.detach().cpu(), # .numpy(),\n                    'map_labels_3d': tmp_labels.detach().cpu(), #.numpy(),\n                    'prop_mask': tmp_prop_mask.detach().cpu(), # .numpy(),\n                    'index': img_metas[0]['index'],\n                    'gt_lane_in_global': preds_dict['gt_lane_in_global'].cpu().numpy(),\n                    'gt_lane_label': preds_dict['gt_lane_label'].cpu().numpy(),\n                }\n            results.append(single_result)\n        \n        return results\n\n    def train(self, *args, **kwargs):\n        super().train(*args, **kwargs)\n        for k, v in self.__dict__.items():\n            if isinstance(v, StreamTensorMemory):\n                v.train(*args, **kwargs)\n    \n    def eval(self):\n        super().eval()\n        for k, v in self.__dict__.items():\n            if isinstance(v, StreamTensorMemory):\n                v.eval()\n\n    def forward(self, *args, return_loss=True, **kwargs):\n        if return_loss:\n            return self.forward_train(*args, **kwargs)\n        else:\n            return self.forward_test(*args, **kwargs)"
  },
  {
    "path": "mmdet3d/models/fbbev/streammapnet/transformer.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport math\nimport warnings\nimport copy\n\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import build_activation_layer, build_norm_layer, xavier_init\nfrom mmcv.cnn.bricks.registry import (TRANSFORMER_LAYER,\n                                      TRANSFORMER_LAYER_SEQUENCE)\nfrom mmcv.cnn.bricks.transformer import (BaseTransformerLayer,\n                                         TransformerLayerSequence,\n                                         build_transformer_layer)\nfrom mmcv.runner.base_module import BaseModule, ModuleList\n\nfrom mmdet.models.utils.builder import TRANSFORMER\n\nfrom mmdet.models.utils.transformer import Transformer\n\nfrom .CustomMSDeformableAttention import CustomMSDeformableAttention\nfrom mmdet.models.utils.transformer import inverse_sigmoid\n    \n@TRANSFORMER_LAYER_SEQUENCE.register_module()\nclass MapTransformerDecoder_new(BaseModule):\n    \"\"\"Implements the decoder in DETR transformer.\n    Args:\n        return_intermediate (bool): Whether to return intermediate outputs.\n        coder_norm_cfg (dict): Config of last normalization layer. Default:\n            `LN`.\n    \"\"\"\n\n    def __init__(self, \n                 transformerlayers=None, \n                 num_layers=None, \n                 prop_add_stage=0,\n                 return_intermediate=True,\n                 fix=False,\n                 init_cfg=None):\n        \n        super().__init__(init_cfg)\n        if isinstance(transformerlayers, dict):\n            transformerlayers = [\n                copy.deepcopy(transformerlayers) for _ in range(num_layers)\n            ]\n        else:\n            assert isinstance(transformerlayers, list) and \\\n                   len(transformerlayers) == num_layers\n        self.num_layers = num_layers\n        self.layers = ModuleList()\n        for i in range(num_layers):\n            self.layers.append(build_transformer_layer(transformerlayers[i]))\n        self.embed_dims = self.layers[0].embed_dims\n        self.pre_norm = self.layers[0].pre_norm\n        self.return_intermediate = return_intermediate\n        self.prop_add_stage = prop_add_stage\n        self.fix = fix\n        assert prop_add_stage >= 0  and prop_add_stage < num_layers\n\n    def forward(self,\n                query,\n                prop_query,\n                key,\n                value,\n                query_pos,\n                key_padding_mask,\n                query_key_padding_mask,\n                reference_points,\n                prop_reference_points,\n                spatial_shapes,\n                level_start_index,\n                reg_branches,\n                cls_branches,\n                is_first_frame_list,\n                predict_refine,\n                **kwargs):\n        \"\"\"Forward function for `TransformerDecoder`.\n        Args:\n            query (Tensor): Input query with shape\n                `(num_query, bs, embed_dims)`.\n            reference_points (Tensor): The reference\n                points of offset. has shape (bs, num_query, num_points, 2).\n            valid_ratios (Tensor): The radios of valid\n                points on the feature map, has shape\n                (bs, num_levels, 2)\n            reg_branch: (obj:`nn.ModuleList`): Used for\n                refining the regression results. Only would\n                be passed when with_box_refine is True,\n                otherwise would be passed a `None`.\n        Returns:\n            Tensor: Results with shape [1, num_query, bs, embed_dims] when\n                return_intermediate is `False`, otherwise it has shape\n                [num_layers, num_query, bs, embed_dims].\n        \"\"\"\n        num_queries, bs, embed_dims = query.shape\n        output = query\n        intermediate = []\n        intermediate_reference_points = []\n        for lid, layer in enumerate(self.layers):\n            if lid == self.prop_add_stage and prop_query is not None and prop_reference_points is not None:\n                bs, topk, embed_dims = prop_query.shape\n                output = output.permute(1, 0, 2)\n                with torch.no_grad():\n                    tmp_scores, _ = cls_branches[lid](output).max(-1) # (bs, num_q)\n                new_query = []\n                new_refpts = []\n                for i in range(bs):\n                    if is_first_frame_list[i]:\n                        new_query.append(output[i])\n                        new_refpts.append(reference_points[i])\n                    else:\n                        _, valid_idx = torch.topk(tmp_scores[i], k=num_queries-topk, dim=-1)\n                        new_query.append(torch.cat([prop_query[i], output[i][valid_idx]], dim=0))\n                        new_refpts.append(torch.cat([prop_reference_points[i], reference_points[i][valid_idx]], dim=0))\n                \n                output = torch.stack(new_query).permute(1, 0, 2)\n                reference_points = torch.stack(new_refpts)\n                assert list(output.shape) == [num_queries, bs, embed_dims]\n\n            tmp = reference_points.clone()\n            if self.fix:\n                tmp[..., 1:2] = 1.0 - reference_points[..., 1:2] # reverse y-axis\n            # reference_points = tmp\n            \n            output = layer(\n                output,\n                key,\n                value,\n                query_pos=query_pos,\n                key_padding_mask=key_padding_mask,\n                reference_points=tmp,\n                spatial_shapes=spatial_shapes,\n                level_start_index=level_start_index,\n                query_key_padding_mask=None,\n                **kwargs)\n            \n            reg_points = reg_branches[lid](output.permute(1, 0, 2)) # (bs, num_q, 2*num_points)\n            bs, num_queries, num_points2 = reg_points.shape\n            reg_points = reg_points.view(bs, num_queries, num_points2//2, 2) # range (0, 1)\n            \n            if predict_refine:\n                new_reference_points = reg_points + inverse_sigmoid(\n                    reference_points\n                )\n                new_reference_points = new_reference_points.sigmoid()\n            else:\n                new_reference_points = reg_points.sigmoid() # (bs, num_q, num_points, 2)\n            \n            reference_points = new_reference_points.clone().detach()\n\n            if self.return_intermediate:\n                intermediate.append(output.permute(1, 0, 2)) # [(bs, num_q, embed_dims)]\n                intermediate_reference_points.append(new_reference_points) # (bs, num_q, num_points, 2)\n\n        if self.return_intermediate:\n            return intermediate, intermediate_reference_points\n\n        return output, reference_points\n\n@TRANSFORMER_LAYER.register_module()\nclass MapTransformerLayer(BaseTransformerLayer):\n    \"\"\"Base `TransformerLayer` for vision transformer.\n\n    It can be built from `mmcv.ConfigDict` and support more flexible\n    customization, for example, using any number of `FFN or LN ` and\n    use different kinds of `attention` by specifying a list of `ConfigDict`\n    named `attn_cfgs`. It is worth mentioning that it supports `prenorm`\n    when you specifying `norm` as the first element of `operation_order`.\n    More details about the `prenorm`: `On Layer Normalization in the\n    Transformer Architecture <https://arxiv.org/abs/2002.04745>`_ .\n\n    Args:\n        attn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )):\n            Configs for `self_attention` or `cross_attention` modules,\n            The order of the configs in the list should be consistent with\n            corresponding attentions in operation_order.\n            If it is a dict, all of the attention modules in operation_order\n            will be built with this config. Default: None.\n        ffn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )):\n            Configs for FFN, The order of the configs in the list should be\n            consistent with corresponding ffn in operation_order.\n            If it is a dict, all of the attention modules in operation_order\n            will be built with this config.\n        operation_order (tuple[str]): The execution order of operation\n            in transformer. Such as ('self_attn', 'norm', 'ffn', 'norm').\n            Support `prenorm` when you specifying first element as `norm`.\n            Default：None.\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: dict(type='LN').\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\n            Default: None.\n        batch_first (bool): Key, Query and Value are shape\n            of (batch, n, embed_dim)\n            or (n, batch, embed_dim). Default to False.\n    \"\"\"\n\n    def __init__(self,\n                 attn_cfgs=None,\n                 ffn_cfgs=dict(\n                     type='FFN',\n                     embed_dims=256,\n                     feedforward_channels=1024,\n                     num_fcs=2,\n                     ffn_drop=0.,\n                     act_cfg=dict(type='ReLU', inplace=True),\n                 ),\n                 operation_order=None,\n                 norm_cfg=dict(type='LN'),\n                 init_cfg=None,\n                 batch_first=False,\n                 **kwargs):\n\n        super().__init__(\n            attn_cfgs=attn_cfgs,\n            ffn_cfgs=ffn_cfgs,\n            operation_order=operation_order,\n            norm_cfg=norm_cfg,\n            init_cfg=init_cfg,\n            batch_first=batch_first,\n            **kwargs\n        )\n\n    def forward(self,\n                query,\n                key=None,\n                value=None,\n                memory_query=None,\n                query_pos=None,\n                key_pos=None,\n                attn_masks=None,\n                query_key_padding_mask=None,\n                key_padding_mask=None,\n                **kwargs):\n        \"\"\"Forward function for `TransformerDecoderLayer`.\n\n        **kwargs contains some specific arguments of attentions.\n\n        Args:\n            query (Tensor): The input query with shape\n                [num_queries, bs, embed_dims] if\n                self.batch_first is False, else\n                [bs, num_queries embed_dims].\n            key (Tensor): The key tensor with shape [num_keys, bs,\n                embed_dims] if self.batch_first is False, else\n                [bs, num_keys, embed_dims] .\n            value (Tensor): The value tensor with same shape as `key`.\n            query_pos (Tensor): The positional encoding for `query`.\n                Default: None.\n            key_pos (Tensor): The positional encoding for `key`.\n                Default: None.\n            attn_masks (List[Tensor] | None): 2D Tensor used in\n                calculation of corresponding attention. The length of\n                it should equal to the number of `attention` in\n                `operation_order`. Default: None.\n            query_key_padding_mask (Tensor): ByteTensor for `query`, with\n                shape [bs, num_queries]. Only used in `self_attn` layer.\n                Defaults to None.\n            key_padding_mask (Tensor): ByteTensor for `query`, with\n                shape [bs, num_keys]. Default: None.\n\n        Returns:\n            Tensor: forwarded results with shape [num_queries, bs, embed_dims].\n        \"\"\"\n\n        norm_index = 0\n        attn_index = 0\n        ffn_index = 0\n        identity = query\n        if attn_masks is None:\n            attn_masks = [None for _ in range(self.num_attn)]\n        elif isinstance(attn_masks, torch.Tensor):\n            attn_masks = [\n                copy.deepcopy(attn_masks) for _ in range(self.num_attn)\n            ]\n            warnings.warn(f'Use same attn_mask in all attentions in '\n                          f'{self.__class__.__name__} ')\n        else:\n            assert len(attn_masks) == self.num_attn, f'The length of ' \\\n                        f'attn_masks {len(attn_masks)} must be equal ' \\\n                        f'to the number of attention in ' \\\n                        f'operation_order {self.num_attn}'\n\n        for layer in self.operation_order:\n            if layer == 'self_attn':\n                if memory_query is None:\n                    temp_key = temp_value = query\n                else:\n                    temp_key = temp_value = torch.cat([memory_query, query], dim=0)\n                \n                query = self.attentions[attn_index](\n                    query,\n                    temp_key,\n                    temp_value,\n                    identity if self.pre_norm else None,\n                    query_pos=query_pos,\n                    key_pos=query_pos,\n                    attn_mask=attn_masks[attn_index],\n                    key_padding_mask=query_key_padding_mask,\n                    **kwargs)\n                attn_index += 1\n                identity = query\n\n            elif layer == 'norm':\n                query = self.norms[norm_index](query)\n                norm_index += 1\n\n            elif layer == 'cross_attn':\n                query = self.attentions[attn_index](\n                    query,\n                    key,\n                    value,\n                    identity if self.pre_norm else None,\n                    query_pos=query_pos,\n                    key_pos=key_pos,\n                    attn_mask=attn_masks[attn_index],\n                    key_padding_mask=key_padding_mask,\n                    **kwargs)\n                attn_index += 1\n                identity = query\n\n            elif layer == 'ffn':\n                query = self.ffns[ffn_index](\n                    query, identity if self.pre_norm else None)\n                ffn_index += 1\n\n        return query\n\n@TRANSFORMER.register_module()\nclass MapTransformer(Transformer):\n    \"\"\"Implements the DeformableDETR transformer.\n    Args:\n        as_two_stage (bool): Generate query from encoder features.\n            Default: False.\n        num_feature_levels (int): Number of feature maps from FPN:\n            Default: 4.\n        two_stage_num_proposals (int): Number of proposals when set\n            `as_two_stage` as True. Default: 300.\n    \"\"\"\n\n    def __init__(self,\n                 num_feature_levels=1,\n                 num_points=20,\n                 coord_dim=2,\n                 **kwargs):\n        super().__init__(**kwargs)\n        self.num_feature_levels = num_feature_levels\n        self.embed_dims = self.encoder.embed_dims\n        self.coord_dim = coord_dim\n        self.num_points = num_points\n        self.init_layers()\n\n    def init_layers(self):\n        \"\"\"Initialize layers of the DeformableDetrTransformer.\"\"\"\n        # self.level_embeds = nn.Parameter(\n        #     torch.Tensor(self.num_feature_levels, self.embed_dims))\n\n    def init_weights(self):\n        \"\"\"Initialize the transformer weights.\"\"\"\n        for p in self.parameters():\n            if p.dim() > 1:\n                nn.init.xavier_uniform_(p)\n        for m in self.modules():\n            if isinstance(m, CustomMSDeformableAttention):\n                m.init_weights()\n\n        # normal_(self.level_embeds)\n\n    def forward(self,\n                mlvl_feats,\n                mlvl_masks,\n                query_embed,\n                mlvl_pos_embeds,\n                init_reference_points,\n                reg_branches=None,\n                cls_branches=None,\n                memory_query=None,\n                prop_query=None,\n                prop_reference_points=None,\n                **kwargs):\n        \"\"\"Forward function for `Transformer`.\n        Args:\n            mlvl_feats (list(Tensor)): Input queries from\n                different level. Each element has shape\n                [bs, embed_dims, h, w].\n            mlvl_masks (list(Tensor)): The key_padding_mask from\n                different level used for encoder and decoder,\n                each element has shape  [bs, h, w].\n            query_embed (Tensor): The query embedding for decoder,\n                with shape [num_query, c].\n            mlvl_pos_embeds (list(Tensor)): The positional encoding\n                of feats from different level, has the shape\n                 [bs, embed_dims, h, w].\n            reg_branches (obj:`nn.ModuleList`): Regression heads for\n                feature maps from each decoder layer. Only would\n                be passed when\n                `with_box_refine` is True. Default to None.\n            cls_branches (obj:`nn.ModuleList`): Classification heads\n                for feature maps from each decoder layer. Only would\n                 be passed when `as_two_stage`\n                 is True. Default to None.\n        Returns:\n            tuple[Tensor]: results of decoder containing the following tensor.\n                - inter_states: Outputs from decoder. If\n                    return_intermediate_dec is True output has shape \\\n                      (num_dec_layers, bs, num_query, embed_dims), else has \\\n                      shape (1, bs, num_query, embed_dims).\n                - init_reference_out: The initial value of reference \\\n                    points, has shape (bs, num_queries, 4).\n                - inter_references_out: The internal value of reference \\\n                    points in decoder, has shape \\\n                    (num_dec_layers, bs,num_query, embed_dims)\n                - enc_outputs_class: The classification score of \\\n                    proposals generated from \\\n                    encoder's feature maps, has shape \\\n                    (batch, h*w, num_classes). \\\n                    Only would be returned when `as_two_stage` is True, \\\n                    otherwise None.\n                - enc_outputs_coord_unact: The regression results \\\n                    generated from encoder's feature maps., has shape \\\n                    (batch, h*w, 4). Only would \\\n                    be returned when `as_two_stage` is True, \\\n                    otherwise None.\n        \"\"\"\n\n        feat_flatten = []\n        mask_flatten = []\n        # lvl_pos_embed_flatten = []\n        spatial_shapes = []\n        for lvl, (feat, mask, pos_embed) in enumerate(\n                zip(mlvl_feats, mlvl_masks, mlvl_pos_embeds)):\n            bs, c, h, w = feat.shape\n            spatial_shape = (h, w)\n            spatial_shapes.append(spatial_shape)\n            feat = feat.flatten(2).transpose(1, 2)\n            mask = mask.flatten(1)\n            # pos_embed = pos_embed.flatten(2).transpose(1, 2)\n            # lvl_pos_embed = pos_embed + self.level_embeds[lvl].view(1, 1, -1)\n            # lvl_pos_embed_flatten.append(lvl_pos_embed)\n            feat_flatten.append(feat)\n            mask_flatten.append(mask)\n        feat_flatten = torch.cat(feat_flatten, 1)\n        mask_flatten = torch.cat(mask_flatten, 1)\n        # lvl_pos_embed_flatten = torch.cat(lvl_pos_embed_flatten, 1)\n        spatial_shapes = torch.as_tensor(\n            spatial_shapes, dtype=torch.long, device=feat_flatten.device)\n        level_start_index = torch.cat((spatial_shapes.new_zeros(\n            (1, )), spatial_shapes.prod(1).cumsum(0)[:-1]))\n        \n        feat_flatten = feat_flatten.permute(1, 0, 2)  # (H*W, bs, embed_dims)\n\n        # decoder\n        query = query_embed.permute(1, 0, 2) # (num_q, bs, embed_dims)\n        if memory_query is not None:\n            memory_query = memory_query.permute(1, 0, 2)\n        inter_states, inter_references = self.decoder(\n            query=query,\n            key=None,\n            value=feat_flatten,\n            query_pos=None,\n            key_padding_mask=mask_flatten,\n            reference_points=init_reference_points,\n            spatial_shapes=spatial_shapes,\n            level_start_index=level_start_index,\n            reg_branches=reg_branches,\n            cls_branches=cls_branches,\n            memory_query=memory_query,\n            prop_query=prop_query,\n            prop_reference_points=prop_reference_points,\n            **kwargs)\n\n        return inter_states, init_reference_points, inter_references\n\n\n\n@TRANSFORMER_LAYER_SEQUENCE.register_module()\nclass PlaceHolderEncoder(nn.Module):\n\n    def __init__(self, *args, embed_dims=None, **kwargs):\n        super(PlaceHolderEncoder, self).__init__()\n        self.embed_dims = embed_dims\n\n    def forward(self, *args, query=None, **kwargs):\n        \n        return query"
  },
  {
    "path": "mmdet3d/models/fbbev/streammapnet/utils.py",
    "content": "import torch\nimport copy\nimport math\nimport torch\nimport torch.nn as nn \nimport numpy as np\nfrom mmcv.cnn import bias_init_with_prob, xavier_init\n\n\nclass StreamTensorMemory(object):\n    def __init__(self, batch_size):\n        self.train_bs = batch_size\n        self.training = True\n        self.bs = self.train_bs\n\n        self.train_memory_list = [None for i in range(self.bs)]\n        self.train_img_metas_memory = [None for i in range(self.bs)]\n\n        self.test_memory_list = [None] # bs = 1 when testing\n        self.test_img_metas_memory = [None]\n    \n    @property\n    def memory_list(self):\n        if self.training:\n            return self.train_memory_list\n        else:\n            return self.test_memory_list\n    \n    @property\n    def img_metas_memory(self):\n        if self.training:\n            return self.train_img_metas_memory\n        else:\n            return self.test_img_metas_memory\n\n    def update(self, memory, img_metas):\n        for i in range(self.bs):\n            self.memory_list[i] = memory[i].clone().detach()\n            self.img_metas_memory[i] = copy.deepcopy(img_metas[i])\n        \n    def reset_single(self, idx):\n        self.memory_list[idx] = None\n        self.img_metas_memory[idx] = None\n\n    def get(self, img_metas):\n        '''\n        img_metas: list[img_metas]\n        '''\n\n        tensor_list = []\n        img_metas_list = []\n        is_first_frame_list = []\n        \n        for i in range(self.bs):\n            if not self.img_metas_memory[i]:\n                is_first_frame = True\n            else:\n                is_first_frame = (img_metas[i]['scene_name'] != self.img_metas_memory[i]['scene_name'])\n\n            if is_first_frame:\n                self.reset_single(i)\n\n            tensor_list.append(self.memory_list[i])\n            img_metas_list.append(self.img_metas_memory[i])\n            is_first_frame_list.append(is_first_frame)\n\n        result = {\n            'tensor': tensor_list,\n            'img_metas': img_metas_list,\n            'is_first_frame': is_first_frame_list,\n        }\n        \n        return result\n    \n    def train(self, mode=True):\n        self.training = mode\n        if mode:\n            self.bs = self.train_bs\n        else:\n            self.bs = 1\n\n    def eval(self):\n        self.train(False)\n\n\n\nclass MotionMLP(nn.Module):\n    ''' \n    Args:\n        c_dim (int): dimension of latent code c\n        f_dim (int): feature dimension\n    '''\n\n    def __init__(self, c_dim, f_dim=512, identity=True):\n        super().__init__()\n        self.c_dim = c_dim\n        self.f_dim = f_dim\n        self.identity = identity\n\n        self.fc = nn.Sequential(\n            nn.Linear(c_dim + f_dim, 2*f_dim),\n            nn.LayerNorm(2*f_dim),\n            nn.ReLU(),\n            nn.Linear(2*f_dim, f_dim)\n        )\n        self.init_weights()\n\n    def init_weights(self):\n        for m in self.fc:\n            for param in m.parameters():\n                if param.dim() > 1:\n                    if self.identity:\n                        nn.init.zeros_(param)\n                    else:\n                        nn.init.xavier_uniform_(param)\n\n    def forward(self, x, c):\n        xc = torch.cat([x, c], dim=-1)\n        out = self.fc(xc)\n\n        if self.identity:\n            out = out + x\n        \n        return out"
  },
  {
    "path": "mmdet3d/models/fbbev/streampetr/__init__.py",
    "content": "from .streampetr_v2 import SparseHead4BEV # ok\nfrom .petr_transformer import * # ok\nfrom .hungarian_assigner_2d import *\nfrom .hungarian_assigner_3d import *\nfrom .match_cost import BBox3DL1Cost\nfrom .nms_free_coder import NMSFreeCoder"
  },
  {
    "path": "mmdet3d/models/fbbev/streampetr/hungarian_assigner_2d.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n# ---------------------------------------------\n#  Modified by Shihao Wang\n# ---------------------------------------------\nimport torch\n\nfrom mmdet.core.bbox.builder import BBOX_ASSIGNERS\nfrom mmdet.core.bbox.assigners import AssignResult\nfrom mmdet.core.bbox.assigners import BaseAssigner\nfrom mmdet.core.bbox.match_costs import build_match_cost\nfrom mmdet.core import bbox_cxcywh_to_xyxy\n\ntry:\n    from scipy.optimize import linear_sum_assignment\nexcept ImportError:\n    linear_sum_assignment = None\n\n\n@BBOX_ASSIGNERS.register_module()\nclass HungarianAssigner2D(BaseAssigner):\n    \"\"\"Computes one-to-one matching between predictions and ground truth.\n\n    This class computes an assignment between the targets and the predictions\n    based on the costs. The costs are weighted sum of three components:\n    classification cost, regression L1 cost and regression iou cost. The\n    targets don't include the no_object, so generally there are more\n    predictions than targets. After the one-to-one matching, the un-matched\n    are treated as backgrounds. Thus each query prediction will be assigned\n    with `0` or a positive integer indicating the ground truth index:\n\n    - 0: negative sample, no assigned gt\n    - positive integer: positive sample, index (1-based) of assigned gt\n\n    Args:\n        cls_weight (int | float, optional): The scale factor for classification\n            cost. Default 1.0.\n        bbox_weight (int | float, optional): The scale factor for regression\n            L1 cost. Default 1.0.\n        iou_weight (int | float, optional): The scale factor for regression\n            iou cost. Default 1.0.\n        iou_calculator (dict | optional): The config for the iou calculation.\n            Default type `BboxOverlaps2D`.\n        iou_mode (str | optional): \"iou\" (intersection over union), \"iof\"\n                (intersection over foreground), or \"giou\" (generalized\n                intersection over union). Default \"giou\".\n    \"\"\"\n\n    def __init__(self,\n                 cls_cost=dict(type='ClassificationCost', weight=1.),\n                 reg_cost=dict(type='BBoxL1Cost', weight=1.0),\n                 iou_cost=dict(type='IoUCost', iou_mode='giou', weight=1.0),\n                 centers2d_cost=dict(type='BBox3DL1Cost', weight=1.0)):\n        self.cls_cost = build_match_cost(cls_cost)\n        self.reg_cost = build_match_cost(reg_cost)\n        self.iou_cost = build_match_cost(iou_cost)\n        self.centers2d_cost = build_match_cost(centers2d_cost)\n\n    def assign(self,\n               bbox_pred,\n               cls_pred,\n               pred_centers2d,\n               gt_bboxes,\n               gt_labels,\n               centers2d,\n               img_meta,\n               gt_bboxes_ignore=None,\n               eps=1e-7):\n        \"\"\"Computes one-to-one matching based on the weighted costs.\n\n        This method assign each query prediction to a ground truth or\n        background. The `assigned_gt_inds` with -1 means don't care,\n        0 means negative sample, and positive number is the index (1-based)\n        of assigned gt.\n        The assignment is done in the following steps, the order matters.\n\n        1. assign every prediction to -1\n        2. compute the weighted costs\n        3. do Hungarian matching on CPU based on the costs\n        4. assign all to 0 (background) first, then for each matched pair\n           between predictions and gts, treat this prediction as foreground\n           and assign the corresponding gt index (plus 1) to it.\n\n        Args:\n            bbox_pred (Tensor): Predicted boxes with normalized coordinates\n                (cx, cy, w, h), which are all in range [0, 1]. Shape\n                [num_query, 4].\n            cls_pred (Tensor): Predicted classification logits, shape\n                [num_query, num_class].\n            gt_bboxes (Tensor): Ground truth boxes with unnormalized\n                coordinates (x1, y1, x2, y2). Shape [num_gt, 4].\n            gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).\n            img_meta (dict): Meta information for current image.\n            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are\n                labelled as `ignored`. Default None.\n            eps (int | float, optional): A value added to the denominator for\n                numerical stability. Default 1e-7.\n\n        Returns:\n            :obj:`AssignResult`: The assigned result.\n        \"\"\"\n        assert gt_bboxes_ignore is None, \\\n            'Only case when gt_bboxes_ignore is None is supported.'\n        num_gts, num_bboxes = gt_bboxes.size(0), bbox_pred.size(0)\n\n        # 1. assign -1 by default\n        assigned_gt_inds = bbox_pred.new_full((num_bboxes, ),\n                                              -1,\n                                              dtype=torch.long)\n        assigned_labels = bbox_pred.new_full((num_bboxes, ),\n                                             -1,\n                                             dtype=torch.long)\n        if num_gts == 0 or num_bboxes == 0:\n            # No ground truth or boxes, return empty assignment\n            if num_gts == 0:\n                # No ground truth, assign all to background\n                assigned_gt_inds[:] = 0\n            return AssignResult(\n                num_gts, assigned_gt_inds, None, labels=assigned_labels)\n        img_h, img_w, _ = img_meta['pad_shape']\n        factor = gt_bboxes.new_tensor([img_w, img_h, img_w,\n                                       img_h]).unsqueeze(0)\n\n        # 2. compute the weighted costs\n        # classification and bboxcost.\n        cls_cost = self.cls_cost(cls_pred, gt_labels)\n        # regression L1 cost\n        normalize_gt_bboxes = gt_bboxes / factor\n        reg_cost = self.reg_cost(bbox_pred, normalize_gt_bboxes)\n        # regression iou cost, defaultly giou is used in official DETR.\n        bboxes = bbox_cxcywh_to_xyxy(bbox_pred) * factor\n        iou_cost = self.iou_cost(bboxes, gt_bboxes)\n\n        # center2d L1 cost\n        normalize_centers2d = centers2d / factor[:, 0:2]\n        centers2d_cost = self.centers2d_cost(pred_centers2d, normalize_centers2d)\n\n        # weighted sum of above four costs\n        cost = cls_cost + reg_cost + iou_cost + centers2d_cost\n        cost = torch.nan_to_num(cost, nan=100.0, posinf=100.0, neginf=-100.0)\n        # 3. do Hungarian matching on CPU using linear_sum_assignment\n        cost = cost.detach().cpu()\n        if linear_sum_assignment is None:\n            raise ImportError('Please run \"pip install scipy\" '\n                              'to install scipy first.')\n        matched_row_inds, matched_col_inds = linear_sum_assignment(cost)\n        matched_row_inds = torch.from_numpy(matched_row_inds).to(\n            bbox_pred.device)\n        matched_col_inds = torch.from_numpy(matched_col_inds).to(\n            bbox_pred.device)\n\n        # 4. assign backgrounds and foregrounds\n        # assign all indices to backgrounds first\n        assigned_gt_inds[:] = 0\n        # assign foregrounds based on matching results\n        assigned_gt_inds[matched_row_inds] = matched_col_inds + 1\n        assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]\n        return AssignResult(\n            num_gts, assigned_gt_inds, None, labels=assigned_labels)\n"
  },
  {
    "path": "mmdet3d/models/fbbev/streampetr/hungarian_assigner_3d.py",
    "content": "# ------------------------------------------------------------------------\n# Modified from DETR3D (https://github.com/WangYueFt/detr3d)\n# Copyright (c) 2021 Wang, Yue\n# ------------------------------------------------------------------------\nimport torch\nfrom mmdet.core.bbox.builder import BBOX_ASSIGNERS\nfrom mmdet.core.bbox.assigners import AssignResult\nfrom mmdet.core.bbox.assigners import BaseAssigner\nfrom mmdet.core.bbox.match_costs import build_match_cost\nfrom .streampetr_utils import normalize_bbox\n\ntry:\n    from scipy.optimize import linear_sum_assignment\nexcept ImportError:\n    linear_sum_assignment = None\n\n@BBOX_ASSIGNERS.register_module()\nclass HungarianAssigner3D(BaseAssigner):\n    def __init__(self,\n                 cls_cost=dict(type='ClassificationCost', weight=1.),\n                 reg_cost=dict(type='BBoxL1Cost', weight=1.0),\n                 iou_cost=dict(type='IoUCost', weight=0.0),\n                 pc_range=None):\n        self.cls_cost = build_match_cost(cls_cost)\n        self.reg_cost = build_match_cost(reg_cost)\n        self.iou_cost = build_match_cost(iou_cost)\n        self.pc_range = pc_range\n\n    def assign(self,\n               bbox_pred,\n               cls_pred,\n               gt_bboxes,\n               gt_labels,\n               gt_bboxes_ignore=None,\n               code_weights=None,\n               with_velo=False,\n               eps=1e-7):\n        assert gt_bboxes_ignore is None, \\\n            'Only case when gt_bboxes_ignore is None is supported.'\n        num_gts, num_bboxes = gt_bboxes.size(0), bbox_pred.size(0)\n        # 1. assign -1 by default\n        assigned_gt_inds = bbox_pred.new_full((num_bboxes, ),\n                                              -1,\n                                              dtype=torch.long)\n        assigned_labels = bbox_pred.new_full((num_bboxes, ),\n                                             -1,\n                                             dtype=torch.long)\n        if num_gts == 0 or num_bboxes == 0:\n            # No ground truth or boxes, return empty assignment\n            if num_gts == 0:\n                # No ground truth, assign all to background\n                assigned_gt_inds[:] = 0\n            return AssignResult(\n                num_gts, assigned_gt_inds, None, labels=assigned_labels)         \n        # 2. compute the weighted costs\n        # classification and bboxcost.\n        cls_cost = self.cls_cost(cls_pred, gt_labels)\n        # regression L1 cost\n        normalized_gt_bboxes = normalize_bbox(gt_bboxes, self.pc_range)\n        if code_weights is not None:\n            bbox_pred = bbox_pred * code_weights\n            normalized_gt_bboxes = normalized_gt_bboxes * code_weights\n        \n        if with_velo:\n            reg_cost = self.reg_cost(bbox_pred, normalized_gt_bboxes)\n        else:\n            reg_cost = self.reg_cost(bbox_pred[:, :8], normalized_gt_bboxes[:, :8])\n      \n        # weighted sum of above two costs\n        cost = cls_cost + reg_cost\n        \n        # 3. do Hungarian matching on CPU using linear_sum_assignment\n        cost = cost.detach().cpu()\n        if linear_sum_assignment is None:\n            raise ImportError('Please run \"pip install scipy\" '\n                              'to install scipy first.')\n        cost = torch.nan_to_num(cost, nan=100.0, posinf=100.0, neginf=-100.0)\n        matched_row_inds, matched_col_inds = linear_sum_assignment(cost)\n        matched_row_inds = torch.from_numpy(matched_row_inds).to(\n            bbox_pred.device)\n        matched_col_inds = torch.from_numpy(matched_col_inds).to(\n            bbox_pred.device)\n\n        # 4. assign backgrounds and foregrounds\n        # assign all indices to backgrounds first\n        assigned_gt_inds[:] = 0\n        # assign foregrounds based on matching results\n        assigned_gt_inds[matched_row_inds] = matched_col_inds + 1\n        assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]\n        return AssignResult(\n            num_gts, assigned_gt_inds, None, labels=assigned_labels)                       "
  },
  {
    "path": "mmdet3d/models/fbbev/streampetr/match_cost.py",
    "content": "import torch\nfrom mmdet.core.bbox.match_costs.builder import MATCH_COST\n\n@MATCH_COST.register_module()\nclass BBox3DL1Cost(object):\n    \"\"\"BBox3DL1Cost.\n     Args:\n         weight (int | float, optional): loss_weight\n    \"\"\"\n\n    def __init__(self, weight=1.):\n        self.weight = weight\n\n    def __call__(self, bbox_pred, gt_bboxes):\n        \"\"\"\n        Args:\n            bbox_pred (Tensor): Predicted boxes with normalized coordinates\n                (cx, cy, w, h), which are all in range [0, 1]. Shape\n                [num_query, 4].\n            gt_bboxes (Tensor): Ground truth boxes with normalized\n                coordinates (x1, y1, x2, y2). Shape [num_gt, 4].\n        Returns:\n            torch.Tensor: bbox_cost value with weight\n        \"\"\"\n        bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1)\n        return bbox_cost * self.weight\n\n"
  },
  {
    "path": "mmdet3d/models/fbbev/streampetr/nms_free_coder.py",
    "content": "import torch\n\nfrom mmdet.core.bbox import BaseBBoxCoder\nfrom mmdet.core.bbox.builder import BBOX_CODERS\nfrom .streampetr_utils import denormalize_bbox\n\n\n@BBOX_CODERS.register_module()\nclass NMSFreeCoder(BaseBBoxCoder):\n    \"\"\"Bbox coder for NMS-free detector.\n    Args:\n        pc_range (list[float]): Range of point cloud.\n        post_center_range (list[float]): Limit of the center.\n            Default: None.\n        max_num (int): Max number to be kept. Default: 100.\n        score_threshold (float): Threshold to filter boxes based on score.\n            Default: None.\n        code_size (int): Code size of bboxes. Default: 9\n    \"\"\"\n\n    def __init__(self,\n                 pc_range=None,\n                 voxel_size=None,\n                 post_center_range=None,\n                 max_num=100,\n                 score_threshold=None,\n                 num_classes=10):\n        \n        self.pc_range = pc_range\n        self.voxel_size = voxel_size\n        self.post_center_range = post_center_range\n        self.max_num = max_num\n        self.score_threshold = score_threshold\n        self.num_classes = num_classes\n\n    def encode(self):\n        pass\n\n    def decode_single(self, cls_scores, bbox_preds):\n        \"\"\"Decode bboxes.\n        Args:\n            cls_scores (Tensor): Outputs from the classification head, \\\n                shape [num_query, cls_out_channels]. Note \\\n                cls_out_channels should includes background.\n            bbox_preds (Tensor): Outputs from the regression \\\n                Shape [num_query, 9].\n        Returns:\n            list[dict]: Decoded boxes.\n        \"\"\"\n        max_num = self.max_num\n        cls_scores = cls_scores.sigmoid()\n        scores, indexs = cls_scores.view(-1).topk(max_num)\n        labels = indexs % self.num_classes\n        bbox_index = torch.div(indexs, self.num_classes, rounding_mode='floor')\n        bbox_preds = bbox_preds[bbox_index]\n\n        final_box_preds = denormalize_bbox(bbox_preds, self.pc_range)   \n        final_scores = scores \n        final_preds = labels \n\n        # use score threshold\n        if self.score_threshold is not None:\n            thresh_mask = final_scores >= self.score_threshold\n        if self.post_center_range is not None:\n            self.post_center_range = torch.tensor(self.post_center_range, device=scores.device)\n            \n            mask = (final_box_preds[..., :3] >=\n                    self.post_center_range[:3]).all(1)\n            mask &= (final_box_preds[..., :3] <=\n                     self.post_center_range[3:]).all(1)\n\n            if self.score_threshold:\n                mask &= thresh_mask\n\n            boxes3d = final_box_preds[mask]\n            scores = final_scores[mask]\n            labels = final_preds[mask]\n            predictions_dict = {\n                'bboxes': boxes3d,\n                'scores': scores,\n                'labels': labels\n            }\n\n        else:\n            raise NotImplementedError(\n                'Need to reorganize output as a batch, only '\n                'support post_center_range is not None for now!')\n        return predictions_dict\n\n    def decode(self, preds_dicts, layer_index=-1):\n        \"\"\"Decode bboxes.\n        Args:\n            all_cls_scores (Tensor): Outputs from the classification head, \\\n                shape [nb_dec, bs, num_query, cls_out_channels]. Note \\\n                cls_out_channels should includes background.\n            all_bbox_preds (Tensor): Sigmoid outputs from the regression \\\n                head with normalized coordinate format \\\n                Shape [nb_dec, bs, num_query, 9].\n        Returns:\n            list[dict]: Decoded boxes.\n        \"\"\"\n        all_cls_scores = preds_dicts['all_cls_scores'][layer_index]\n        all_bbox_preds = preds_dicts['all_bbox_preds'][layer_index]\n        \n        batch_size = all_cls_scores.size()[0]\n        predictions_list = []\n        for i in range(batch_size):\n            predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i]))\n        return predictions_list\n"
  },
  {
    "path": "mmdet3d/models/fbbev/streampetr/petr_transformer.py",
    "content": "import numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import xavier_init, constant_init, build_norm_layer\nfrom mmcv.cnn.bricks.transformer import (BaseTransformerLayer,\n                                         TransformerLayerSequence,\n                                         build_transformer_layer_sequence,\n                                         build_attention,\n                                         build_feedforward_network)\nfrom mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttnFunction\nfrom mmcv.runner.base_module import BaseModule\nfrom mmcv.cnn.bricks.registry import (ATTENTION,TRANSFORMER_LAYER,\n                                      TRANSFORMER_LAYER_SEQUENCE)\nfrom mmdet.models.utils.builder import TRANSFORMER\nfrom .streampetr_utils import  pos2posemb3d, bevpos2posemb\nfrom mmdet.models.utils.transformer import inverse_sigmoid\nfrom mmcv.utils import deprecated_api_warning, ConfigDict\nimport warnings\nimport copy\nfrom torch.nn import ModuleList\nimport torch.utils.checkpoint as cp\nfrom mmcv.runner import force_fp32\nfrom torch.cuda.amp import autocast\n\n# Disable warnings\nwarnings.filterwarnings(\"ignore\")\n\ndef get_ego_pos(points, pc_range):\n    if points.size(-1) == 3:\n        points = points * (pc_range[3:6] - pc_range[0:3]) + pc_range[0:3]\n    elif  points.size(-1) == 2:\n        points = points * (pc_range[3:5] - pc_range[0:2]) + pc_range[0:2]\n    return points\n\ndef get_rel_pos(points, pc_range):\n    if points.size(-1) == 3:\n        return (points - pc_range[0:3]) / (pc_range[3:6] - pc_range[0:3])\n    elif  points.size(-1) == 2:\n        return (points - pc_range[0:2]) / (pc_range[3:5] - pc_range[0:2])\n\n\n@TRANSFORMER.register_module()\nclass Detr3DTransformer(BaseModule):\n    \"\"\"Implements the Detr3D transformer.\n    Args:\n        as_two_stage (bool): Generate query from encoder features.\n            Default: False.\n        num_feature_levels (int): Number of feature maps from FPN:\n            Default: 4.\n        two_stage_num_proposals (int): Number of proposals when set\n            `as_two_stage` as True. Default: 300.\n    \"\"\"\n\n    def __init__(self,\n                 decoder=None,\n                 **kwargs):\n        super(Detr3DTransformer, self).__init__(**kwargs)\n        self.decoder = build_transformer_layer_sequence(decoder)\n\n    def init_weights(self):\n        \"\"\"Initialize the transformer weights.\"\"\"\n        for p in self.parameters():\n            if p.dim() > 1:\n                nn.init.xavier_uniform_(p)\n        for m in self.modules():\n            if hasattr(m, \"init_weight\"):\n                m.init_weight()\n\n    def forward(self,\n                query,\n                query_pos,\n                feat_flatten,\n                spatial_flatten,\n                level_start_index, \n                temp_memory, \n                temp_pos,\n                attn_masks,\n                reference_points, \n                pc_range, \n                data, \n                img_metas,\n                temp_reference_points=None,\n                reg_branches=None,\n                query_embedding=None,\n                return_intermediate_pts=False,\n                cam_params=None,\n                debug_info=None,\n                ):\n        \"\"\"Forward function for `Detr3DTransformer`.\n        Args:\n            mlvl_feats (list(Tensor)): Input queries from\n                different level. Each element has shape\n                [bs, embed_dims, h, w].\n            query_embed (Tensor): The query embedding for decoder,\n                with shape [num_query, c].\n            mlvl_pos_embeds (list(Tensor)): The positional encoding\n                of feats from different level, has the shape\n                 [bs, embed_dims, h, w].\n            reg_branches (obj:`nn.ModuleList`): Regression heads for\n                feature maps from each decoder layer. Only would\n                be passed when\n                `with_box_refine` is True. Default to None.\n        Returns:\n            tuple[Tensor]: results of decoder containing the following tensor.\n                - inter_states: Outputs from decoder. If\n                    return_intermediate_dec is True output has shape \\\n                      (num_dec_layers, bs, num_query, embed_dims), else has \\\n                      shape (1, bs, num_query, embed_dims).\n                - init_reference_out: The initial value of reference \\\n                    points, has shape (bs, num_queries, 4).\n                - inter_references_out: The internal value of reference \\\n                    points in decoder, has shape \\\n                    (num_dec_layers, bs,num_query, embed_dims)\n                - enc_outputs_class: The classification score of \\\n                    proposals generated from \\\n                    encoder's feature maps, has shape \\\n                    (batch, h*w, num_classes). \\\n                    Only would be returned when `as_two_stage` is True, \\\n                    otherwise None.\n                - enc_outputs_coord_unact: The regression results \\\n                    generated from encoder's feature maps., has shape \\\n                    (batch, h*w, 4). Only would \\\n                    be returned when `as_two_stage` is True, \\\n                    otherwise None.\n        \"\"\"\n        lidar2img = None #  data['lidar2img']\n        \n        inter_states = self.decoder(\n            query=query,\n            query_pos=query_pos,\n            mlvl_feats=feat_flatten,\n            temp_memory=temp_memory, \n            temp_pos=temp_pos,\n            reference_points=reference_points,\n            spatial_flatten=spatial_flatten,\n            level_start_index=level_start_index,\n            pc_range=pc_range, \n            lidar2img=lidar2img, \n            img_metas=img_metas,\n            attn_masks=attn_masks,\n            reg_branches=reg_branches,\n            query_embedding=query_embedding,\n            return_intermediate_pts=return_intermediate_pts,\n            cam_params=cam_params,\n            debug_info=debug_info,\n            temp_reference_points=temp_reference_points,\n            )\n\n        return inter_states\n\n@TRANSFORMER_LAYER_SEQUENCE.register_module()\nclass Detr3DTransformerDecoder(TransformerLayerSequence):\n    \"\"\"Implements the decoder in DETR3D transformer.\n    Args:\n        return_intermediate (bool): Whether to return intermediate outputs.\n        coder_norm_cfg (dict): Config of last normalization layer. Default：\n            `LN`.\n    \"\"\"\n\n    def __init__(self, embed_dims, *args,  predict_refine=True, **kwargs):\n        self.predict_refine =predict_refine\n        super(Detr3DTransformerDecoder, self).__init__(*args, **kwargs)\n\n    def forward(self,\n                query,\n                query_pos,\n                mlvl_feats,\n                temp_memory, \n                temp_pos,\n                reference_points,\n                spatial_flatten,\n                level_start_index,\n                pc_range, \n                lidar2img, \n                img_metas,\n                attn_masks,\n                temp_reference_points=None,\n                reg_branches=None,\n                query_embedding=None,\n                return_intermediate_pts=False,\n                cam_params=None,\n                debug_info=None,\n               \n                ):\n        \"\"\"Forward function for `Detr3DTransformerDecoder`.\n        Args:\n            query (Tensor): Input query with shape\n                `(num_query, bs, embed_dims)`.\n            reference_points (Tensor): The reference\n                points of offset. has shape\n                (bs, num_query, 4) when as_two_stage,\n                otherwise has shape ((bs, num_query, 2).\n            reg_branch: (obj:`nn.ModuleList`): Used for\n                refining the regression results. Only would\n                be passed when with_box_refine is True,\n                otherwise would be passed a `None`.\n        Returns:\n            Tensor: Results with shape [1, num_query, bs, embed_dims] when\n                return_intermediate is `False`, otherwise it has shape\n                [num_layers, num_query, bs, embed_dims].\n        \"\"\"\n        intermediate = []\n        intermediate_reference_points = []\n        ori_reference_points = reference_points.clone()\n        for lid, layer in enumerate(self.layers):\n            \n            query = layer(\n                query,\n                query_pos,\n                mlvl_feats,\n                temp_memory, \n                temp_pos,\n                reference_points,\n                spatial_flatten,\n                level_start_index,\n                pc_range, \n                lidar2img, \n                img_metas,\n                attn_masks,\n                temp_reference_points=temp_reference_points,\n                cam_params=cam_params,\n                debug_info=debug_info,\n                )\n            if reg_branches is not None:\n                ref_shape = reference_points.shape\n\n                if len(ref_shape) == 3: # Detection\n                    reg_points = reg_branches[lid](query)[..., :3].reshape(*ref_shape)\n                elif len(ref_shape) == 4: # Map\n                    reg_points = reg_branches[lid](query).reshape(*ref_shape)\n                if self.predict_refine:\n                    new_reference_points = reg_points + inverse_sigmoid(reference_points)\n                else:\n                    if len(ref_shape) == 3: # Detection predicts the offset from the initial reference_points\n                        new_reference_points = reg_points + inverse_sigmoid(ori_reference_points)\n                    elif len(ref_shape) == 4: # Map predcits absolute reference points\n                        new_reference_points = reg_points\n                new_reference_points = new_reference_points.sigmoid()\n                reference_points = new_reference_points.clone().detach()\n                intermediate_reference_points.append(new_reference_points) # Look twice from DINO\n                if lid < len(self.layers)-1 and query_embedding is not None:\n                    if len(ref_shape) == 3: # Detection\n                        query_pos = query_embedding(pos2posemb3d(reference_points))\n                    elif len(ref_shape) == 4: # Map\n                        query_pos = query_embedding(bevpos2posemb(reference_points, 32).flatten(-2, -1))\n\n            intermediate.append(query)\n        if return_intermediate_pts:\n            return  torch.stack(intermediate),  torch.stack(intermediate_reference_points)\n        return torch.stack(intermediate)\n\n@TRANSFORMER_LAYER.register_module()\nclass Detr3DTemporalDecoderLayer(BaseModule):\n    \"\"\"Base `TransformerLayer` for vision transformer.\n\n    It can be built from `mmcv.ConfigDict` and support more flexible\n    customization, for example, using any number of `FFN or LN ` and\n    use different kinds of `attention` by specifying a list of `ConfigDict`\n    named `attn_cfgs`. It is worth mentioning that it supports `prenorm`\n    when you specifying `norm` as the first element of `operation_order`.\n    More details about the `prenorm`: `On Layer Normalization in the\n    Transformer Architecture <https://arxiv.org/abs/2002.04745>`_ .\n\n    Args:\n        attn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )):\n            Configs for `self_attention` or `cross_attention` modules,\n            The order of the configs in the list should be consistent with\n            corresponding attentions in operation_order.\n            If it is a dict, all of the attention modules in operation_order\n            will be built with this config. Default: None.\n        ffn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )):\n            Configs for FFN, The order of the configs in the list should be\n            consistent with corresponding ffn in operation_order.\n            If it is a dict, all of the attention modules in operation_order\n            will be built with this config.\n        operation_order (tuple[str]): The execution order of operation\n            in transformer. Such as ('self_attn', 'norm', 'ffn', 'norm').\n            Support `prenorm` when you specifying first element as `norm`.\n            Default：None.\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: dict(type='LN').\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\n            Default: None.\n        batch_first (bool): Key, Query and Value are shape\n            of (batch, n, embed_dim)\n            or (n, batch, embed_dim). Default to False.\n    \"\"\"\n\n    def __init__(self,\n                 attn_cfgs=None,\n                 ffn_cfgs=dict(\n                     type='FFN',\n                     embed_dims=256,\n                     feedforward_channels=1024,\n                     num_fcs=2,\n                     ffn_drop=0.,\n                     act_cfg=dict(type='ReLU', inplace=True),\n                 ),\n                 operation_order=None,\n                 norm_cfg=dict(type='LN'),\n                 init_cfg=None,\n                 batch_first=False,\n                 with_cp=True,\n                 **kwargs):\n        super().__init__(init_cfg)\n\n        self.batch_first = batch_first\n\n        assert set(operation_order) & {\n            'self_attn', 'norm', 'ffn', 'cross_attn'} == \\\n            set(operation_order), f'The operation_order of' \\\n            f' {self.__class__.__name__} should ' \\\n            f'contains all four operation type ' \\\n            f\"{['self_attn', 'norm', 'ffn', 'cross_attn']}\"\n\n        num_attn = operation_order.count('self_attn') + operation_order.count(\n            'cross_attn')\n        if isinstance(attn_cfgs, dict):\n            attn_cfgs = [copy.deepcopy(attn_cfgs) for _ in range(num_attn)]\n        else:\n            assert num_attn == len(attn_cfgs), f'The length ' \\\n                f'of attn_cfg {num_attn} is ' \\\n                f'not consistent with the number of attention' \\\n                f'in operation_order {operation_order}.'\n\n        self.num_attn = num_attn\n        self.operation_order = operation_order\n        self.norm_cfg = norm_cfg\n        self.pre_norm = operation_order[0] == 'norm'\n        self.attentions = ModuleList()\n\n        index = 0\n        for operation_name in operation_order:\n            if operation_name in ['self_attn', 'cross_attn']:\n                if 'batch_first' in attn_cfgs[index]:\n                    assert self.batch_first == attn_cfgs[index]['batch_first']\n                else:\n                    attn_cfgs[index]['batch_first'] = self.batch_first\n                attention = build_attention(attn_cfgs[index])\n                # Some custom attentions used as `self_attn`\n                # or `cross_attn` can have different behavior.\n                attention.operation_name = operation_name\n                self.attentions.append(attention)\n                index += 1\n\n        self.embed_dims = self.attentions[0].embed_dims\n\n        self.ffns = ModuleList()\n        num_ffns = operation_order.count('ffn')\n        if isinstance(ffn_cfgs, dict):\n            ffn_cfgs = ConfigDict(ffn_cfgs)\n        if isinstance(ffn_cfgs, dict):\n            ffn_cfgs = [copy.deepcopy(ffn_cfgs) for _ in range(num_ffns)]\n        assert len(ffn_cfgs) == num_ffns\n        for ffn_index in range(num_ffns):\n            if 'embed_dims' not in ffn_cfgs[ffn_index]:\n                ffn_cfgs[ffn_index]['embed_dims'] = self.embed_dims\n            else:\n                assert ffn_cfgs[ffn_index]['embed_dims'] == self.embed_dims\n            self.ffns.append(\n                build_feedforward_network(ffn_cfgs[ffn_index],\n                                          dict(type='FFN')))\n\n        self.norms = ModuleList()\n        num_norms = operation_order.count('norm')\n        for _ in range(num_norms):\n            self.norms.append(build_norm_layer(norm_cfg, self.embed_dims)[1])\n\n        self.use_checkpoint = with_cp\n\n    def _forward(self,\n                query,\n                query_pos,\n                mlvl_feats,\n                temp_memory, \n                temp_pos,\n                reference_points,\n                spatial_flatten,\n                level_start_index,\n                pc_range, \n                lidar2img, \n                img_metas,\n                attn_masks=None,\n                query_key_padding_mask=None,\n                key_padding_mask=None,\n                temp_reference_points=None,\n                cam_params=None,\n                debug_info=None,\n                **kwargs):\n        \"\"\"Forward function for `TransformerDecoderLayer`.\n\n        **kwargs contains some specific arguments of attentions.\n\n        Args:\n            query (Tensor): The input query with shape\n                [num_queries, bs, embed_dims] if\n                self.batch_first is False, else\n                [bs, num_queries embed_dims].\n            key (Tensor): The key tensor with shape [num_keys, bs,\n                embed_dims] if self.batch_first is False, else\n                [bs, num_keys, embed_dims] .\n            value (Tensor): The value tensor with same shape as `key`.\n            query_pos (Tensor): The positional encoding for `query`.\n                Default: None.\n            key_pos (Tensor): The positional encoding for `key`.\n                Default: None.\n            attn_masks (List[Tensor] | None): 2D Tensor used in\n                calculation of corresponding attention. The length of\n                it should equal to the number of `attention` in\n                `operation_order`. Default: None.\n            query_key_padding_mask (Tensor): ByteTensor for `query`, with\n                shape [bs, num_queries]. Only used in `self_attn` layer.\n                Defaults to None.\n            key_padding_mask (Tensor): ByteTensor for `query`, with\n                shape [bs, num_keys]. Default: None.\n\n        Returns:\n            Tensor: forwarded results with shape [num_queries, bs, embed_dims].\n        \"\"\"\n\n        norm_index = 0\n        attn_index = 0\n        ffn_index = 0\n        identity = query\n        if attn_masks is None:\n            attn_masks = [None for _ in range(self.num_attn)]\n        elif isinstance(attn_masks, torch.Tensor):\n            attn_masks = [\n                copy.deepcopy(attn_masks) for _ in range(self.num_attn)\n            ]\n            warnings.warn(f'Use same attn_mask in all attentions in '\n                          f'{self.__class__.__name__} ')\n        else:\n            assert len(attn_masks) == self.num_attn, f'The length of ' \\\n                        f'attn_masks {len(attn_masks)} must be equal ' \\\n                        f'to the number of attention in ' \\\n                        f'operation_order {self.num_attn}'\n\n        for layer in self.operation_order:\n            if layer == 'self_attn':\n                if temp_memory is not None:\n                    temp_key = temp_value = torch.cat([query, temp_memory], dim=1)\n                    if query_pos is not None and temp_pos is not None: \n                        temp_pos = torch.cat([query_pos, temp_pos], dim=1)\n                    temp_reference_points = torch.cat([reference_points, temp_reference_points], dim=1)\n                else:\n                    temp_key = temp_value = query\n                    temp_pos = query_pos\n                    temp_reference_points = reference_points\n                query = self.attentions[attn_index](\n                    query,\n                    temp_key,\n                    temp_value,\n                    identity if self.pre_norm else None,\n                    query_pos=query_pos,\n                    key_pos=temp_pos,\n                    attn_mask=attn_masks[attn_index],\n                    key_padding_mask=query_key_padding_mask,\n                    reference_points=reference_points,\n                    temp_reference_points=temp_reference_points,\n                    pc_range=pc_range,\n                    **kwargs)\n                attn_index += 1\n                identity = query\n\n            elif layer == 'norm':\n                query = self.norms[norm_index](query)\n                norm_index += 1\n\n            elif layer == 'cross_attn':\n                query = self.attentions[attn_index](\n                    query,\n                    query_pos,\n                    mlvl_feats,\n                    reference_points,\n                    spatial_flatten,\n                    level_start_index,\n                    pc_range, \n                    lidar2img, \n                    img_metas,\n                    cam_params=cam_params,\n                    debug_info=debug_info,\n                    **kwargs)\n                attn_index += 1\n                identity = query\n\n            elif layer == 'ffn':\n                query = self.ffns[ffn_index](\n                    query, identity if self.pre_norm else None)\n                ffn_index += 1\n\n        return query\n\n    def forward(self, \n                query,\n                query_pos,\n                mlvl_feats,\n                temp_memory, \n                temp_pos,\n                reference_points,\n                spatial_flatten,\n                level_start_index,\n                pc_range, \n                lidar2img, \n                img_metas,\n                attn_masks=None,\n                query_key_padding_mask=None,\n                key_padding_mask=None,\n                temp_reference_points=None,\n                cam_params=None,\n                debug_info=None,\n                ):\n        \"\"\"Forward function for `TransformerCoder`.\n        Returns:\n            Tensor: forwarded results with shape [num_query, bs, embed_dims].\n        \"\"\"\n\n        if self.use_checkpoint and self.training:\n            x = cp.checkpoint(\n                self._forward, \n                query,\n                query_pos,\n                mlvl_feats,\n                temp_memory, \n                temp_pos,\n                reference_points,\n                spatial_flatten,\n                level_start_index,\n                pc_range, \n                lidar2img, \n                img_metas,\n                attn_masks,\n                query_key_padding_mask,\n                key_padding_mask,\n                temp_reference_points,\n                cam_params,\n                debug_info\n                )\n        else:\n            x = self._forward(\n            query,\n            query_pos,\n            mlvl_feats,\n            temp_memory, \n            temp_pos,\n            reference_points,\n            spatial_flatten,\n            level_start_index,\n            pc_range, \n            lidar2img, \n            img_metas,\n            attn_masks,\n            query_key_padding_mask,\n            key_padding_mask,\n            temp_reference_points=temp_reference_points,\n            cam_params=cam_params,\n            debug_info=debug_info,\n        )\n        return x\n\n\n@ATTENTION.register_module()\nclass DeformableFeatureAggregationCuda(BaseModule):\n    def __init__(\n            self,\n            embed_dims=256,\n            num_groups=8,\n            num_levels=4,\n            num_cams=6,\n            dropout=0.1,\n            num_pts=13,\n            num_anchor_pts=1,\n            im2col_step=64,\n            batch_first=True,\n            code_size=3,\n            bias=1.,\n            ):\n        super(DeformableFeatureAggregationCuda, self).__init__()\n        self.embed_dims = embed_dims\n        self.num_groups = num_groups\n        self.num_anchor_pts = num_anchor_pts\n        self.group_dims = (self.embed_dims // self.num_groups)\n        self.num_levels = num_levels\n        self.num_cams = num_cams\n        self.num_pts = num_pts\n        self.code_size = code_size\n        self.weights_fc = nn.Linear(self.embed_dims, self.num_groups * self.num_levels * num_pts * self.num_anchor_pts)\n        self.output_proj = nn.Linear(self.embed_dims, self.embed_dims)\n        self.learnable_fc = nn.Linear(self.embed_dims, self.num_anchor_pts * num_pts * code_size)\n        # self.cam_embed = nn.Sequential(\n        #     nn.Linear(12, self.embed_dims // 2),\n        #     nn.ReLU(inplace=True),\n        #     nn.Linear(self.embed_dims // 2, self.embed_dims),\n        #     nn.ReLU(inplace=True),\n        #     nn.LayerNorm(self.embed_dims),\n        # )\n        self.drop = nn.Dropout(dropout)\n        self.im2col_step = im2col_step\n        self.bias = bias\n\n    def init_weight(self):\n        constant_init(self.weights_fc, val=0.0, bias=0.0)\n        xavier_init(self.output_proj, distribution=\"uniform\", bias=0.0)\n        nn.init.uniform_(self.learnable_fc.bias.data, -self.bias, self.bias)    \n\n    @force_fp32()\n    def forward(self, instance_feature, query_pos, feat_flatten, reference_points, spatial_flatten, level_start_index, pc_range, lidar2img_mat, img_metas, cam_params=None, debug_info=None):\n        bs, num_query = reference_points.shape[:2]\n        reference_points = get_ego_pos(reference_points, pc_range)\n        if reference_points.dim()==3 and self.num_anchor_pts==1:\n            key_points = reference_points.unsqueeze(-2) + self.learnable_fc(instance_feature).reshape(bs, num_query, -1, self.code_size)\n        elif reference_points.dim()==4 and self.num_anchor_pts==reference_points.size(2): # one query has more than 1 reference points\n            key_points = reference_points.unsqueeze(-2) + self.learnable_fc(instance_feature).reshape(bs, num_query, self.num_anchor_pts, -1, self.code_size)\n           \n            key_points = key_points.reshape(bs, num_query, self.num_anchor_pts * self.num_pts, self.code_size)\n        key_points = get_rel_pos(key_points, pc_range)\n        weights = self._get_weights(instance_feature, query_pos, lidar2img_mat)\n\n        features = self.feature_sampling(feat_flatten, spatial_flatten, level_start_index, key_points, weights, lidar2img_mat, img_metas)\n\n        output = self.output_proj(features)\n        output = self.drop(output) + instance_feature\n        return output\n\n    def _get_weights(self, instance_feature, anchor_embed, lidar2img_mat):\n        bs, num_query = instance_feature.shape[:2]\n        # lidar2img = lidar2img_mat[..., :3, :].flatten(-2)\n        # cam_embed = self.cam_embed(lidar2img) # B, N, C\n        if anchor_embed is not None:\n            feat_pos = (instance_feature + anchor_embed) # .unsqueeze(2)  # + cam_embed.unsqueeze(1)\n        else:\n            feat_pos = instance_feature\n\n        if self.num_anchor_pts==1:\n            weights = self.weights_fc(feat_pos).reshape(bs, num_query, self.num_groups, -1).softmax(dim=-1)\n            weights = weights.reshape(bs, num_query, self.num_groups, self.num_levels, self.num_pts).contiguous()\n        else:\n            weights = self.weights_fc(feat_pos).reshape(bs, num_query, self.num_groups, self.num_anchor_pts, -1).softmax(dim=-1) / self.num_anchor_pts\n            weights = weights.reshape(bs, num_query, self.num_groups, self.num_anchor_pts, self.num_levels, self.num_pts)\n            weights = weights.permute(0, 1, 2, 4, 3, 5).flatten(-2).contiguous()\n\n        return weights\n\n    def feature_sampling(self, feat_flatten, spatial_flatten, level_start_index, key_points, weights, lidar2img_mat, img_metas):\n        bs, num_query, _ = key_points.shape[:3]\n\n        # pts_extand = torch.cat([key_points, torch.ones_like(key_points[..., :1])], dim=-1)\n        # points_2d = torch.matmul(lidar2img_mat[:, :, None, None], pts_extand[:, None, ..., None]).squeeze(-1)\n\n        # points_2d = points_2d[..., :2] / torch.clamp(points_2d[..., 2:3], min=1e-5)\n        # points_2d[..., 0:1] = points_2d[..., 0:1] / img_metas[0]['pad_shape'][0][1]\n        # points_2d[..., 1:2] = points_2d[..., 1:2] / img_metas[0]['pad_shape'][0][0]\n\n        # points_2d = points_2d.flatten(end_dim=1) #[b*6, 900, 13, 2]\n        # points_2d = points_2d[:, :, None, None, :, :].repeat(1, 1, self.num_groups, self.num_levels, 1, 1)\n\n        points_2d = key_points[..., :2]\n        points_2d = points_2d[:, :, None, None, :, :].repeat(1, 1, self.num_groups, self.num_levels, 1, 1)\n\n        bn, num_value, _ = feat_flatten.size()\n        feat_flatten = feat_flatten.reshape(bn, num_value, self.num_groups, -1)\n        # attention_weights = weights * mask\n        with autocast(enabled=False):\n            output = MultiScaleDeformableAttnFunction.apply(\n                feat_flatten, spatial_flatten, level_start_index, points_2d,\n                weights, self.im2col_step)\n        \n        output = output.reshape(bs, num_query, -1)\n\n        return output\n\n\n@ATTENTION.register_module()\nclass DeformableFeatureAggregationCuda_v2(BaseModule):\n    def __init__(\n            self,\n            embed_dims=256,\n            num_groups=8,\n            num_levels=4,\n            num_cams=6,\n            dropout=0.1,\n            num_pts=13,\n            num_anchor_pts=1,\n            im2col_step=64,\n            batch_first=True,\n           \n            bias=1.,\n            ):\n        super(DeformableFeatureAggregationCuda_v2, self).__init__()\n        self.embed_dims = embed_dims\n        self.num_groups = num_groups\n        self.num_anchor_pts = num_anchor_pts\n        self.group_dims = (self.embed_dims // self.num_groups)\n        self.num_levels = num_levels\n        self.num_cams = num_cams\n        self.num_pts = num_pts\n        self.weights_fc = nn.Linear(self.embed_dims, self.num_groups * self.num_levels * num_pts * self.num_anchor_pts)\n        self.output_proj = nn.Linear(self.embed_dims, self.embed_dims)\n        self.learnable_fc = nn.Linear(self.embed_dims, self.num_anchor_pts * num_pts * 3)\n        # self.cam_embed = nn.Sequential(\n        #     nn.Linear(12, self.embed_dims // 2),\n        #     nn.ReLU(inplace=True),\n        #     nn.Linear(self.embed_dims // 2, self.embed_dims),\n        #     nn.ReLU(inplace=True),\n        #     nn.LayerNorm(self.embed_dims),\n        # )\n        self.drop = nn.Dropout(dropout)\n        self.im2col_step = im2col_step\n        self.bias = bias\n\n    def init_weight(self):\n        constant_init(self.weights_fc, val=0.0, bias=0.0)\n        xavier_init(self.output_proj, distribution=\"uniform\", bias=0.0)\n        nn.init.uniform_(self.learnable_fc.bias.data, -self.bias, self.bias)    \n\n    @force_fp32()\n    def forward(self, instance_feature, query_pos, feat_flatten, reference_points, spatial_flatten, level_start_index, pc_range, lidar2img_mat, img_metas, cam_params=None, debug_info=None):\n        bs, num_query = reference_points.shape[:2]\n        reference_points = get_ego_pos(reference_points, pc_range)\n        if reference_points.dim()==3 and self.num_anchor_pts==1:\n            key_points = reference_points.unsqueeze(-2) + self.learnable_fc(instance_feature+query_pos).reshape(bs, num_query, -1, 3)\n        elif reference_points.dim()==4 and self.num_anchor_pts==reference_points.size(2): # one query has more than 1 reference points\n            key_points = reference_points.unsqueeze(-2) + self.learnable_fc(instance_feature+query_pos).reshape(bs, num_query, self.num_anchor_pts, -1, 3)\n            key_points = key_points.reshape(bs, num_query, self.num_anchor_pts * self.num_pts, 3)\n        key_points = get_rel_pos(key_points, pc_range)\n        weights = self._get_weights(instance_feature, query_pos, lidar2img_mat)\n\n        features = self.feature_sampling(feat_flatten, spatial_flatten, level_start_index, key_points, weights, lidar2img_mat, img_metas)\n\n        output = self.output_proj(features)\n        output = self.drop(output) + instance_feature\n        return output\n\n    def _get_weights(self, instance_feature, anchor_embed, lidar2img_mat):\n        bs, num_query = instance_feature.shape[:2]\n        # lidar2img = lidar2img_mat[..., :3, :].flatten(-2)\n        # cam_embed = self.cam_embed(lidar2img) # B, N, C\n        feat_pos = instance_feature + anchor_embed # .unsqueeze(2)  # + cam_embed.unsqueeze(1)\n        if self.num_anchor_pts==1:\n            weights = self.weights_fc(feat_pos).reshape(bs, num_query, self.num_groups, -1).softmax(dim=-1)\n            weights = weights.reshape(bs, num_query, self.num_groups, self.num_levels, self.num_pts).contiguous()\n        else:\n            weights = self.weights_fc(feat_pos).reshape(bs, num_query, self.num_groups, self.num_anchor_pts, -1).softmax(dim=-1) / self.num_anchor_pts\n            weights = weights.reshape(bs, num_query, self.num_groups, self.num_anchor_pts, self.num_levels, self.num_pts)\n            weights = weights.permute(0, 1, 2, 4, 3, 5).flatten(-2).contiguous()\n\n        return weights\n\n    def feature_sampling(self, feat_flatten, spatial_flatten, level_start_index, key_points, weights, lidar2img_mat, img_metas):\n        bs, num_query, _ = key_points.shape[:3]\n\n        # pts_extand = torch.cat([key_points, torch.ones_like(key_points[..., :1])], dim=-1)\n        # points_2d = torch.matmul(lidar2img_mat[:, :, None, None], pts_extand[:, None, ..., None]).squeeze(-1)\n\n        # points_2d = points_2d[..., :2] / torch.clamp(points_2d[..., 2:3], min=1e-5)\n        # points_2d[..., 0:1] = points_2d[..., 0:1] / img_metas[0]['pad_shape'][0][1]\n        # points_2d[..., 1:2] = points_2d[..., 1:2] / img_metas[0]['pad_shape'][0][0]\n\n        # points_2d = points_2d.flatten(end_dim=1) #[b*6, 900, 13, 2]\n        # points_2d = points_2d[:, :, None, None, :, :].repeat(1, 1, self.num_groups, self.num_levels, 1, 1)\n\n        points_2d = key_points[..., :2]\n        points_2d = points_2d[:, :, None, None, :, :].repeat(1, 1, self.num_groups, self.num_levels, 1, 1)\n\n        bn, num_value, _ = feat_flatten.size()\n        feat_flatten = feat_flatten.reshape(bn, num_value, self.num_groups, -1)\n        # attention_weights = weights * mask\n        with autocast(enabled=False):\n            output = MultiScaleDeformableAttnFunction.apply(\n                feat_flatten, spatial_flatten, level_start_index, points_2d,\n                weights, self.im2col_step)\n        \n        output = output.reshape(bs, num_query, -1)\n\n        return output\n\n\n\n\n@ATTENTION.register_module()\nclass MVDeformableFeatureAggregationCuda(BaseModule):\n    def __init__(\n            self,\n            embed_dims=256,\n            num_groups=8,\n            num_levels=4,\n            num_cams=6,\n            dropout=0.1,\n            num_pts=13,\n            im2col_step=64,\n            batch_first=True,\n            bias=1.,\n            ):\n        super(MVDeformableFeatureAggregationCuda, self).__init__()\n        self.embed_dims = embed_dims\n        self.num_groups = num_groups\n        self.group_dims = (self.embed_dims // self.num_groups)\n        self.num_levels = num_levels\n        self.num_cams = num_cams\n        self.weights_fc = nn.Linear(self.embed_dims, self.num_groups * self.num_levels * num_pts)\n        self.output_proj = nn.Linear(self.embed_dims, self.embed_dims)\n        self.learnable_fc = nn.Linear(self.embed_dims, num_pts * 3)\n        self.cam_embed = nn.Sequential(\n            nn.Linear(26, self.embed_dims // 2),\n            nn.ReLU(inplace=True),\n            nn.Linear(self.embed_dims // 2, self.embed_dims),\n            nn.ReLU(inplace=True),\n            nn.LayerNorm(self.embed_dims),\n        )\n        self.drop = nn.Dropout(dropout)\n        self.im2col_step = im2col_step\n        self.bias = bias\n\n    def init_weight(self):\n        constant_init(self.weights_fc, val=0.0, bias=0.0)\n        xavier_init(self.output_proj, distribution=\"uniform\", bias=0.0)\n        nn.init.uniform_(self.learnable_fc.bias.data, -self.bias, self.bias)    \n\n    def forward(self, instance_feature, query_pos, feat_flatten, reference_points, spatial_flatten, level_start_index, pc_range, lidar2img_mat, img_metas, cam_params=None, debug_info=None):\n        bs, num_anchor = reference_points.shape[:2]\n        reference_points = get_ego_pos(reference_points, pc_range)\n        key_points = reference_points.unsqueeze(-2) + self.learnable_fc(instance_feature).reshape(bs, num_anchor, -1, 3)\n\n        weights = self._get_weights(instance_feature, query_pos, lidar2img_mat, cam_params)\n\n        features = self.feature_sampling(feat_flatten, spatial_flatten, level_start_index, key_points, weights, lidar2img_mat, img_metas, cam_params=cam_params, debug_info=debug_info)\n\n        output = self.output_proj(features)\n        output = self.drop(output) + instance_feature\n        return output\n\n    def _get_weights(self, instance_feature, anchor_embed, lidar2img_mat, cam_params=None):\n        bs, num_anchor = instance_feature.shape[:2]\n        # lidar2img = lidar2img_mat[..., :3, :].flatten(-2)\n\n        rots, trans, intrins, post_rots, post_trans, bda = cam_params\n        mln_input = torch.cat([intrins[..., 0, 0:1], intrins[..., 1,1:2], rots.flatten(-2), trans, post_rots.flatten(-2), post_trans], dim=-1)\n        \n\n        cam_embed = self.cam_embed(mln_input) # B, N, C\n        feat_pos = (instance_feature + anchor_embed).unsqueeze(2)  + cam_embed.unsqueeze(1)\n        weights = self.weights_fc(feat_pos).reshape(bs, num_anchor, -1, self.num_groups).softmax(dim=-2)\n        weights = weights.reshape(bs, num_anchor, self.num_cams, -1, self.num_groups).permute(0, 2, 1, 4, 3).contiguous()\n        return weights.flatten(end_dim=1)\n\n    @force_fp32(apply_to=('feat_flatten', 'key_points'))\n    def feature_sampling(self, feat_flatten, spatial_flatten, level_start_index, key_points, weights, lidar2img_mat, img_metas, cam_params=None, debug_info=None):\n        bs, num_anchor, _ = key_points.shape[:3]\n\n        rots, trans, intrins, post_rots, post_trans, bda = cam_params\n        B, N, _ = trans.shape\n        eps = 1e-5\n        ogfH, ogfW = img_metas[0]['input_size']\n        reference_points = key_points\n\n        # reference_points = debug_info['centers3d'][0][:, :3][None, :, None, :].to(rots.device)\n        with autocast(enabled=False):\n            reference_points = reference_points[:, None].repeat(1, N, 1, 1, 1)\n            reference_points = torch.inverse(bda).view(B, 1, 1, 1, 3,\n                          3).matmul(reference_points.unsqueeze(-1)).squeeze(-1)\n            reference_points -= trans.view(B, N, 1, 1, 3)\n            combine = rots.matmul(torch.inverse(intrins)).inverse()\n            points_2d = combine.view(B, N, 1, 1, 3, 3).matmul(reference_points.unsqueeze(-1)).squeeze(-1)\n            points_2d = torch.cat([points_2d[..., 0:2] / torch.maximum(\n                points_2d[..., 2:3], torch.ones_like(points_2d[..., 2:3])*eps),  points_2d[..., 2:3]], 4\n                )\n            points_2d = post_rots.view(B, N, 1, 1, 3, 3).matmul(points_2d.unsqueeze(-1)).squeeze(-1)\n            points_2d += post_trans.view(B, N, 1, 1, 3) \n            \n            # imgs = debug_info['img'][0]\n            # import cv2\n            # from IPython import embed\n            # embed()\n            # exit()\n            # for i in range(6):\n            #     img2 = imgs[i].permute(1, 2, 0).cpu().numpy().astype(np.float32)\n            #     img = np.ones([320, 800, 3], dtype=np.float32) * 255\n            #     img = img.astype(np.uint8)\n            #     for corner in points_2d[0][i]:\n            #         corner = corner[0]\n            #         if (0<corner[0]<ogfW) & (0<corner[1]<ogfH):\n                        \n            #             corner = corner.cpu().numpy()[:2].astype(np.int)\n            #             print(corner)\n            #             img = cv2.circle(img, corner, 2, (61, 102, 255))\n            #     img = np.concatenate((img2, img), axis=0)\n            #     cv2.imwrite(f'{i}=.png', img[:, :,::-1])\n            points_2d[..., 0] /= ogfW\n            points_2d[..., 1] /= ogfH\n\n            points_2d = points_2d.flatten(end_dim=1) #[b*6, 900, 13, 2]\n            points_2d = points_2d[:, :, None, None, :, :].repeat(1, 1, self.num_groups, self.num_levels, 1, 1)\n\n            bn, num_value, _ = feat_flatten.size()\n            feat_flatten = feat_flatten.reshape(bn, num_value, self.num_groups, -1)\n            # attention_weights = weights * mask\n            output = MultiScaleDeformableAttnFunction.apply(\n                    feat_flatten, spatial_flatten, level_start_index, points_2d,\n                    weights, self.im2col_step)\n        \n            output = output.reshape(bs, self.num_cams, num_anchor, -1)\n\n        return output.sum(1)\n\n\nfrom mmcv.cnn.bricks.transformer import MultiheadAttention\n@ATTENTION.register_module()\nclass SparseBEVSelfAttention(BaseModule):\n    def __init__(self, embed_dims=256, num_heads=8, dropout=0.1, pc_range=[], init_cfg=None, batch_first=True, **kwargs):\n        super().__init__(init_cfg)\n        self.pc_range = pc_range\n        self.embed_dims = embed_dims\n        self.attention = MultiheadAttention(embed_dims, num_heads, dropout, batch_first=batch_first)\n        self.gen_tau = nn.Linear(embed_dims, num_heads)\n\n    @torch.no_grad()\n    def init_weights(self):\n        nn.init.zeros_(self.gen_tau.weight)\n        nn.init.uniform_(self.gen_tau.bias, 0.0, 2.0)\n\n    def forward(self,\n                    query,\n                    temp_key,\n                    temp_value,\n                    identity,\n                    query_pos=None,\n                    key_pos=None,\n                    attn_mask=None,\n                    key_padding_mask=None,\n                    reference_points=None,\n                    temp_reference_points=None,\n                    pc_range=None,\n                    **kwargs):\n        dist = self.calc_points_dists(reference_points, temp_reference_points, pc_range)\n        tau = self.gen_tau(query)  # [B, Q, 8]\n        tau = tau.permute(0, 2, 1)  # [B, 8, Q]\n        dist_attn_mask = dist[:, None, :, :] * tau[..., None]  # [B, 8, Q, Q]\n        if attn_mask is not None:\n            dist_attn_mask[:, :, attn_mask] = float('-inf')\n        dist_attn_mask = dist_attn_mask.flatten(0, 1)  # [Bx8, Q, Q]\n\n        return self.attention(query,\n                    temp_key,\n                    temp_value,\n                    identity,\n                    query_pos,\n                    key_pos,\n                    dist_attn_mask,)\n\n    @torch.no_grad()\n    def calc_points_dists(self, reference_points, temp_reference_points, pc_range):\n    \n        reference_points = get_ego_pos(reference_points, pc_range)[..., :2] \n        temp_reference_points = get_ego_pos(temp_reference_points, pc_range)[..., :2]\n        dist = []\n        for b in range(reference_points.shape[0]):\n            dist_b = torch.norm(reference_points[b].reshape(-1, 1, 2) - temp_reference_points[b].reshape(1, -1, 2), dim=-1)\n            dist.append(dist_b[None, ...])\n\n        dist = torch.cat(dist, dim=0)  # [B, Q, Q]\n        dist = -dist\n\n        return dist\n\n\nfrom mmcv.cnn.bricks.transformer import MultiheadAttention\n@ATTENTION.register_module()\nclass MotionSelfAttention(BaseModule):\n    def __init__(self, embed_dims=256, num_heads=8, dropout=0.1, pc_range=[], init_cfg=None, batch_first=True, dist_func_type='ADE', consider_map_quality=True, **kwargs):\n        super().__init__(init_cfg)\n        self.pc_range = pc_range\n        self.embed_dims = embed_dims\n        self.attention = MultiheadAttention(embed_dims, num_heads, dropout, batch_first=batch_first)\n        self.gen_tau = nn.Linear(embed_dims, num_heads)\n        self.dist_func_type = dist_func_type\n        self.consider_map_quality = consider_map_quality\n        if self.consider_map_quality and dist_func_type != 'ADE':\n            self.map_alpha = nn.Parameter(\n                torch.tensor([0.5]), requires_grad=False\n            )\n\n    @torch.no_grad()\n    def init_weights(self):\n        nn.init.zeros_(self.gen_tau.weight)\n        nn.init.uniform_(self.gen_tau.bias, 0.0, 2.0)\n\n    def forward(self,\n                    query,\n                    key,\n                    value,\n                    identity,\n                    query_pos=None,\n                    key_pos=None,\n                    attn_mask=None,\n                    key_padding_mask=None,\n                    reference_points_q=None,\n                    reference_points_v=None,\n                    pc_range=None,\n                    map_scores=None,\n                    **kwargs):\n        \n        func_mapper = {\n            'ADE': self.calc_ADE,\n            'MDE': self.calc_MDE,\n            'MDE_v2': self.calc_MDE_v2\n        }\n        dist_func = func_mapper[self.dist_func_type]\n        dist = dist_func(reference_points_q, reference_points_v, pc_range, map_scores=map_scores)\n        tau = self.gen_tau(query)  # [B, Q, 8]\n        tau = tau.permute(0, 2, 1)  # [B, 8, Q]\n        dist_attn_mask = dist[:, None, :, :] * tau[..., None]  # [B, 8, Q, Q]\n        if attn_mask is not None:\n            dist_attn_mask[:, :, attn_mask] = float('-inf')\n        dist_attn_mask = dist_attn_mask.flatten(0, 1)  # [Bx8, Q, Q]\n\n        return self.attention(query,\n                    key,\n                    value,\n                    identity,\n                    query_pos,\n                    key_pos,\n                    dist_attn_mask,)\n\n    @torch.no_grad()\n    def calc_ADE(self, reference_points_q, reference_points_v, pc_range, **kwargs):\n        \"\"\"average distance\"\"\"\n        dist = []\n        code_size = reference_points_q.size(-1)\n        n_points = reference_points_q.size(-2)\n        for b in range(reference_points_q.shape[0]):\n            dist_b = torch.norm(reference_points_q[b].reshape(-1, 1, n_points, code_size) - reference_points_v[b].reshape(1, -1, n_points, code_size), dim=-1)\n            dist.append(dist_b[None, ...].mean(-1))\n        dist = torch.cat(dist, dim=0)  # [B, Q, K]\n        dist = -dist\n        return dist\n\n\n    @torch.no_grad()\n    def calc_MDE(self, reference_points_q, reference_points_v, pc_range, map_scores=None):\n        \"\"\"\n        mim mean distance between the map lane and traj.\n        \"\"\"\n        reference_points_q = reference_points_q[..., :2]\n        q_shape = reference_points_q.shape\n        v_shape = reference_points_v.shape\n        reference_points_q = reference_points_q.flatten(1, 2)\n        reference_points_v = reference_points_v.flatten(1, 2)\n        \n        dist = []\n        code_size = reference_points_q.size(-1)\n        for b in range(reference_points_q.shape[0]):\n            dist_b = torch.norm(reference_points_q[b].reshape(-1, 1, code_size) - reference_points_v[b].reshape(1, -1, code_size), dim=-1)\n            dist.append(dist_b[None, ...])\n        dist = torch.cat(dist, dim=0)  # [B, Q, K]\n        dist = dist.view(q_shape[0], q_shape[1], q_shape[2], v_shape[1], v_shape[2])\n        dist = dist.min(-1).values.sum(2)\n        \n        if self.consider_map_quality and map_scores is not None:\n            map_scores = map_scores.sigmoid().max(-1)[0] # smaller, better\n            map_scores = torch.round(1-map_scores, decimals=1) + self.map_alpha\n            dist = dist * map_scores.unsqueeze(1)\n            \n        dist = -dist\n\n        return dist\n\n    @torch.no_grad()\n    def calc_MDE_v2(self, reference_points_q, reference_points_v, pc_range, map_scores=None):\n        \"\"\"\n        mim mean distance between the map lane and traj.\n        \"\"\"\n        reference_points_q = reference_points_q[..., :2]\n        q_shape = reference_points_q.shape\n        v_shape = reference_points_v.shape\n        reference_points_q = reference_points_q.flatten(1, 2)\n        reference_points_v = reference_points_v.flatten(1, 2)\n        \n        dist = []\n        code_size = reference_points_q.size(-1)\n        for b in range(reference_points_q.shape[0]):\n            dist_b = torch.norm(reference_points_q[b].reshape(-1, 1, code_size) - reference_points_v[b].reshape(1, -1, code_size), dim=-1)\n            dist.append(dist_b[None, ...])\n        dist = torch.cat(dist, dim=0)  # [B, Q, K]\n        dist = dist.view(q_shape[0], q_shape[1], q_shape[2], v_shape[1], v_shape[2])\n        dist = dist.min(-1).values.mean(2)\n        dist[dist>5] = 1000\n        \n        if self.consider_map_quality and map_scores is not None:\n            map_scores = map_scores.sigmoid().max(-1)[0] # smaller, better\n            # map_scores = torch.round(1-map_scores, decimals=1) + self.map_alpha\n            dist[map_scores.unsqueeze(1)<0.2] = 1000\n        dist = -dist\n        return dist\n\n@TRANSFORMER_LAYER_SEQUENCE.register_module()\nclass CustomTransformerDecoder(TransformerLayerSequence):\n    \"\"\"Implements the decoder in DETR3D transformer.\n    Args:\n        return_intermediate (bool): Whether to return intermediate outputs.\n        coder_norm_cfg (dict): Config of last normalization layer. Default: `LN`.\n    \"\"\"\n\n    def __init__(self, *args, return_intermediate=False, **kwargs):\n        super(CustomTransformerDecoder, self).__init__(*args, **kwargs)\n        self.return_intermediate = return_intermediate\n        self.fp16_enabled = False\n\n    def forward(self,\n                query,\n                key=None,\n                value=None,\n                query_pos=None,\n                key_pos=None,\n                attn_masks=None,\n                key_padding_mask=None,\n                *args,\n                **kwargs):\n        \"\"\"Forward function for `Detr3DTransformerDecoder`.\n        Args:\n            query (Tensor): Input query with shape\n                `(num_query, bs, embed_dims)`.\n        Returns:\n            Tensor: Results with shape [1, num_query, bs, embed_dims] when\n                return_intermediate is `False`, otherwise it has shape\n                [num_layers, num_query, bs, embed_dims].\n        \"\"\"\n        intermediate = []\n        for lid, layer in enumerate(self.layers):\n            query = layer(\n                query=query,\n                key=key,\n                value=value,\n                query_pos=query_pos,\n                key_pos=key_pos,\n                attn_masks=attn_masks,\n                key_padding_mask=key_padding_mask,\n                *args,\n                **kwargs)\n\n            if self.return_intermediate:\n                intermediate.append(query)\n\n        if self.return_intermediate:\n            return torch.stack(intermediate)\n\n        return query"
  },
  {
    "path": "mmdet3d/models/fbbev/streampetr/streampetr_utils.py",
    "content": "import torch\n\ndef normalize_bbox(bboxes, pc_range):\n    cx = bboxes[..., 0:1]\n    cy = bboxes[..., 1:2]\n    cz = bboxes[..., 2:3]\n    w = bboxes[..., 3:4].log()\n    l = bboxes[..., 4:5].log()\n    h = bboxes[..., 5:6].log()\n\n    rot = bboxes[..., 6:7]\n    if bboxes.size(-1) > 7:\n        vx = bboxes[..., 7:8] \n        vy = bboxes[..., 8:9]\n        normalized_bboxes = torch.cat(\n            (cx, cy, cz, w, l, h, rot.sin(), rot.cos(), vx, vy), dim=-1\n        )\n    else:\n        normalized_bboxes = torch.cat(\n            (cx, cy, cz, w, l, h, rot.sin(), rot.cos()), dim=-1\n        )\n    return normalized_bboxes\n\n# ------------------------------------------------------------------------\n# Copyright (c) 2022 megvii-model. All Rights Reserved.\n# ------------------------------------------------------------------------\n# Modified from mmdetection (https://github.com/open-mmlab/mmdetection)\n# Copyright (c) OpenMMLab. All rights reserved.\n# ------------------------------------------------------------------------\n#  Modified by Shihao Wang\n# ------------------------------------------------------------------------\nimport math\nimport torch\nimport torch.nn as nn \nimport numpy as np\n\ndef denormalize_bbox(normalized_bboxes, pc_range):\n    # rotation \n    rot_sine = normalized_bboxes[..., 6:7]\n\n    rot_cosine = normalized_bboxes[..., 7:8]\n    rot = torch.atan2(rot_sine, rot_cosine)\n\n    # center in the bev\n    cx = normalized_bboxes[..., 0:1]\n    cy = normalized_bboxes[..., 1:2]\n    cz = normalized_bboxes[..., 2:3]\n\n    # size\n    w = normalized_bboxes[..., 3:4]\n    l = normalized_bboxes[..., 4:5]\n    h = normalized_bboxes[..., 5:6]\n\n    w = w.exp() \n    l = l.exp() \n    h = h.exp() \n    if normalized_bboxes.size(-1) > 8:\n         # velocity \n        vx = normalized_bboxes[:, 8:9]\n        vy = normalized_bboxes[:, 9:10]\n        denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1)\n    else:\n        denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1)\n    return denormalized_bboxes\n    \ndef pos2posemb3d(pos, num_pos_feats=128, temperature=10000):\n    scale = 2 * math.pi\n    pos = pos * scale\n    dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=pos.device)\n    dim_t = temperature ** (2 * torch.div(dim_t, 2, rounding_mode='floor') / num_pos_feats)\n    pos_x = pos[..., 0, None] / dim_t\n    pos_y = pos[..., 1, None] / dim_t\n    pos_z = pos[..., 2, None] / dim_t\n    pos_x = torch.stack((pos_x[..., 0::2].sin(), pos_x[..., 1::2].cos()), dim=-1).flatten(-2)\n    pos_y = torch.stack((pos_y[..., 0::2].sin(), pos_y[..., 1::2].cos()), dim=-1).flatten(-2)\n    pos_z = torch.stack((pos_z[..., 0::2].sin(), pos_z[..., 1::2].cos()), dim=-1).flatten(-2)\n    posemb = torch.cat((pos_y, pos_x, pos_z), dim=-1)\n    return posemb\n\ndef bevpos2posemb(pos, num_pos_feats=128, temperature=10000):\n    scale = 2 * math.pi\n    pos = pos * scale\n    dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=pos.device)\n    dim_t = temperature ** (2 * torch.div(dim_t, 2, rounding_mode='floor') / num_pos_feats)\n    pos_x = pos[..., 0, None] / dim_t\n    pos_y = pos[..., 1, None] / dim_t\n    pos_x = torch.stack((pos_x[..., 0::2].sin(), pos_x[..., 1::2].cos()), dim=-1).flatten(-2)\n    pos_y = torch.stack((pos_y[..., 0::2].sin(), pos_y[..., 1::2].cos()), dim=-1).flatten(-2)\n    posemb = torch.cat((pos_y, pos_x), dim=-1)\n    return posemb\n\ndef pos2posemb1d(pos, num_pos_feats=256, temperature=10000):\n    scale = 2 * math.pi\n    pos = pos * scale\n    dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=pos.device)\n    dim_t = temperature ** (2 * torch.div(dim_t, 2, rounding_mode='floor') / num_pos_feats)\n    pos_x = pos[..., 0, None] / dim_t\n\n    pos_x = torch.stack((pos_x[..., 0::2].sin(), pos_x[..., 1::2].cos()), dim=-1).flatten(-2)\n\n    return pos_x\n\ndef nerf_positional_encoding(\n    tensor, num_encoding_functions=6, include_input=False, log_sampling=True\n) -> torch.Tensor:\n    r\"\"\"Apply positional encoding to the input.\n    Args:\n        tensor (torch.Tensor): Input tensor to be positionally encoded.\n        encoding_size (optional, int): Number of encoding functions used to compute\n            a positional encoding (default: 6).\n        include_input (optional, bool): Whether or not to include the input in the\n            positional encoding (default: True).\n    Returns:\n    (torch.Tensor): Positional encoding of the input tensor.\n    \"\"\"\n    # TESTED\n    # Trivially, the input tensor is added to the positional encoding.\n    encoding = [tensor] if include_input else []\n    frequency_bands = None\n    if log_sampling:\n        frequency_bands = 2.0 ** torch.linspace(\n            0.0,\n            num_encoding_functions - 1,\n            num_encoding_functions,\n            dtype=tensor.dtype,\n            device=tensor.device,\n        )\n    else:\n        frequency_bands = torch.linspace(\n            2.0 ** 0.0,\n            2.0 ** (num_encoding_functions - 1),\n            num_encoding_functions,\n            dtype=tensor.dtype,\n            device=tensor.device,\n        )\n\n    for freq in frequency_bands:\n        for func in [torch.sin, torch.cos]:\n            encoding.append(func(tensor * freq))\n\n    # Special case, for no positional encoding\n    if len(encoding) == 1:\n        return encoding[0]\n    else:\n        return torch.cat(encoding, dim=-1)\n\n\nimport torch\nimport torch.nn as nn\nimport numpy as np\nfrom mmdet.core import bbox_xyxy_to_cxcywh\nfrom mmdet.models.utils.transformer import inverse_sigmoid\n\ndef memory_refresh(memory, prev_exist):\n    memory_shape = memory.shape\n    view_shape = [1 for _ in range(len(memory_shape))]\n    prev_exist = prev_exist.view(-1, *view_shape[1:]) \n    return memory * prev_exist\n    \ndef topk_gather(feat, topk_indexes):\n    if topk_indexes is not None:\n        feat_shape = feat.shape\n        topk_shape = topk_indexes.shape\n        \n        view_shape = [1 for _ in range(len(feat_shape))] \n        view_shape[:2] = topk_shape[:2]\n        topk_indexes = topk_indexes.view(*view_shape)\n        \n        feat = torch.gather(feat, 1, topk_indexes.repeat(1, 1, *feat_shape[2:]))\n    return feat\n\n\ndef apply_ltrb(locations, pred_ltrb): \n        \"\"\"\n        :param locations:  (1, H, W, 2)\n        :param pred_ltrb:  (N, H, W, 4) \n        \"\"\"\n        pred_boxes = torch.zeros_like(pred_ltrb)\n        pred_boxes[..., 0] = (locations[..., 0] - pred_ltrb[..., 0])# x1\n        pred_boxes[..., 1] = (locations[..., 1] - pred_ltrb[..., 1])# y1\n        pred_boxes[..., 2] = (locations[..., 0] + pred_ltrb[..., 2])# x2\n        pred_boxes[..., 3] = (locations[..., 1] + pred_ltrb[..., 3])# y2\n        min_xy = pred_boxes[..., 0].new_tensor(0)\n        max_xy = pred_boxes[..., 0].new_tensor(1)\n        pred_boxes  = torch.where(pred_boxes < min_xy, min_xy, pred_boxes)\n        pred_boxes  = torch.where(pred_boxes > max_xy, max_xy, pred_boxes)\n        pred_boxes = bbox_xyxy_to_cxcywh(pred_boxes)\n\n\n        return pred_boxes    \n\ndef apply_center_offset(locations, center_offset): \n        \"\"\"\n        :param locations:  (1, H, W, 2)\n        :param pred_ltrb:  (N, H, W, 4) \n        \"\"\"\n        centers_2d = torch.zeros_like(center_offset)\n        locations = inverse_sigmoid(locations)\n        centers_2d[..., 0] = locations[..., 0] + center_offset[..., 0]  # x1\n        centers_2d[..., 1] = locations[..., 1] + center_offset[..., 1]  # y1\n        centers_2d = centers_2d.sigmoid()\n\n        return centers_2d\n\n@torch.no_grad()\ndef locations(features, stride, pad_h, pad_w):\n        \"\"\"\n        Arguments:\n            features:  (N, C, H, W)\n        Return:\n            locations:  (H, W, 2)\n        \"\"\"\n\n        h, w = features.size()[-2:]\n        device = features.device\n        \n        shifts_x = (torch.arange(\n            0, stride*w, step=stride,\n            dtype=torch.float32, device=device\n        ) + stride // 2 ) / pad_w\n        shifts_y = (torch.arange(\n            0, h * stride, step=stride,\n            dtype=torch.float32, device=device\n        ) + stride // 2) / pad_h\n        shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)\n        shift_x = shift_x.reshape(-1)\n        shift_y = shift_y.reshape(-1)\n        locations = torch.stack((shift_x, shift_y), dim=1)\n        \n        locations = locations.reshape(h, w, 2)\n        \n        return locations\n\n\n\ndef gaussian_2d(shape, sigma=1.0):\n    \"\"\"Generate gaussian map.\n\n    Args:\n        shape (list[int]): Shape of the map.\n        sigma (float, optional): Sigma to generate gaussian map.\n            Defaults to 1.\n\n    Returns:\n        np.ndarray: Generated gaussian map.\n    \"\"\"\n    m, n = [(ss - 1.) / 2. for ss in shape]\n    y, x = np.ogrid[-m:m + 1, -n:n + 1]\n\n    h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))\n    h[h < np.finfo(h.dtype).eps * h.max()] = 0\n    return h\n\n\ndef draw_heatmap_gaussian(heatmap, center, radius, k=1):\n    \"\"\"Get gaussian masked heatmap.\n\n    Args:\n        heatmap (torch.Tensor): Heatmap to be masked.\n        center (torch.Tensor): Center coord of the heatmap.\n        radius (int): Radius of gaussian.\n        K (int, optional): Multiple of masked_gaussian. Defaults to 1.\n\n    Returns:\n        torch.Tensor: Masked heatmap.\n    \"\"\"\n    diameter = 2 * radius + 1\n    gaussian = gaussian_2d((diameter, diameter), sigma=diameter / 6)\n\n    x, y = int(center[0]), int(center[1])\n\n    height, width = heatmap.shape[0:2]\n\n    left, right = min(x, radius), min(width - x, radius + 1)\n    top, bottom = min(y, radius), min(height - y, radius + 1)\n\n    masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]\n    masked_gaussian = torch.from_numpy(\n        gaussian[radius - top:radius + bottom,\n                 radius - left:radius + right]).to(heatmap.device,\n                                                   torch.float32)\n    if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:\n        torch.max(masked_heatmap, masked_gaussian * k, out=masked_heatmap)\n    return heatmap\n\nclass SELayer_Linear(nn.Module):\n    def __init__(self, channels, act_layer=nn.ReLU, gate_layer=nn.Sigmoid):\n        super().__init__()\n        self.conv_reduce = nn.Linear(channels, channels)\n        self.act1 = act_layer()\n        self.conv_expand = nn.Linear(channels, channels)\n        self.gate = gate_layer()\n\n    def forward(self, x, x_se):\n        x_se = self.conv_reduce(x_se)\n        x_se = self.act1(x_se)\n        x_se = self.conv_expand(x_se)\n        return x * self.gate(x_se)\n        \n\nclass MLN(nn.Module):\n    ''' \n    Args:\n        c_dim (int): dimension of latent code c\n        f_dim (int): feature dimension\n    '''\n\n    def __init__(self, c_dim, f_dim=256):\n        super().__init__()\n        self.c_dim = c_dim\n        self.f_dim = f_dim\n\n        self.reduce = nn.Sequential(\n            nn.Linear(c_dim, f_dim),\n            nn.ReLU(),\n        )\n        self.gamma = nn.Linear(f_dim, f_dim)\n        self.beta = nn.Linear(f_dim, f_dim)\n        self.ln = nn.LayerNorm(f_dim, elementwise_affine=False)\n        self.reset_parameters()\n\n    def reset_parameters(self):\n        nn.init.zeros_(self.gamma.weight)\n        nn.init.zeros_(self.beta.weight)\n        nn.init.ones_(self.gamma.bias)\n        nn.init.zeros_(self.beta.bias)\n\n    def forward(self, x, c):\n        x = self.ln(x)\n        c = self.reduce(c)\n        gamma = self.gamma(c)\n        beta = self.beta(c)\n        out = gamma * x + beta\n\n        return out\n\n\ndef transform_reference_points(reference_points, egopose, reverse=False, translation=True):\n    reference_points = torch.cat([reference_points, torch.ones_like(reference_points[..., 0:1])], dim=-1)\n    if reverse:\n        matrix = egopose.inverse()\n    else:\n        matrix = egopose\n    if not translation:\n        matrix[..., :3, 3] = 0.0\n    if reference_points.dim()==4:\n        B, N, K, C = reference_points.shape\n        reference_points = reference_points.view(B, N*K, C)\n        reference_points = (matrix.unsqueeze(1) @ reference_points.unsqueeze(-1)).squeeze(-1)[..., :3]\n        return reference_points.view(B, N, K, 3)\n    else:\n        reference_points = (matrix.unsqueeze(1) @ reference_points.unsqueeze(-1)).squeeze(-1)[..., :3]\n        return reference_points\n"
  },
  {
    "path": "mmdet3d/models/fbbev/streampetr/streampetr_v2.py",
    "content": "# Copyright (c) 2023-2024, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# TODO: add license here\n\n\nimport torch\nimport torch.nn as nn \nfrom mmcv.cnn import Linear, bias_init_with_prob, Scale\n\nfrom mmcv.runner import force_fp32\nfrom mmdet.core import (build_assigner, build_sampler, multi_apply,\n                        reduce_mean)\nfrom mmdet.models.utils import build_transformer\nfrom mmdet.models import HEADS, build_loss\nfrom mmdet.models.dense_heads.anchor_free_head import AnchorFreeHead\nfrom mmdet.models.utils.transformer import inverse_sigmoid\nfrom mmdet3d.core.bbox.coders import build_bbox_coder\nfrom .streampetr_utils import *\nimport copy\nfrom mmdet.models.utils import NormedLinear\nfrom mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d\nfrom mmdet3d.models.fbbev.utils import save_tensor\n\n@HEADS.register_module()\nclass SparseHead4BEV(AnchorFreeHead):\n    \"\"\"Implements the DETR transformer head.\n    See `paper: End-to-End Object Detection with Transformers\n    <https://arxiv.org/pdf/2005.12872>`_ for details.\n    Args:\n        num_classes (int): Number of categories excluding the background.\n        in_channels (int): Number of channels in the input feature map.\n        num_query (int): Number of query in Transformer.\n        num_reg_fcs (int, optional): Number of fully-connected layers used in\n            `FFN`, which is then used for the regression head. Default 2.\n        transformer (obj:`mmcv.ConfigDict`|dict): Config for transformer.\n            Default: None.\n        sync_cls_avg_factor (bool): Whether to sync the avg_factor of\n            all ranks. Default to False.\n        positional_encoding (obj:`mmcv.ConfigDict`|dict):\n            Config for position encoding.\n        loss_cls (obj:`mmcv.ConfigDict`|dict): Config of the\n            classification loss. Default `CrossEntropyLoss`.\n        loss_bbox (obj:`mmcv.ConfigDict`|dict): Config of the\n            regression loss. Default `L1Loss`.\n        loss_iou (obj:`mmcv.ConfigDict`|dict): Config of the\n            regression iou loss. Default `GIoULoss`.\n        tran_cfg (obj:`mmcv.ConfigDict`|dict): Training config of\n            transformer head.\n        test_cfg (obj:`mmcv.ConfigDict`|dict): Testing config of\n            transformer head.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n    _version = 2\n\n    def __init__(self,\n                 num_classes,\n                 in_channels=256,\n                 stride=[16],\n                 embed_dims=256,\n                 num_query=100,\n                 num_reg_fcs=2,\n                 memory_len=1024,\n                 topk_proposals=256,\n                 num_propagated=256,\n                 with_dn=True,\n                 with_ego_pos=True,\n                 match_with_velo=True,\n                 match_costs=None,\n                 transformer=None,\n                 sync_cls_avg_factor=False,\n                 code_weights=None,\n                 bbox_coder=None,\n                 loss_cls=dict(\n                     type='CrossEntropyLoss',\n                     bg_cls_weight=0.1,\n                     use_sigmoid=False,\n                     loss_weight=1.0,\n                     class_weight=1.0),\n                 loss_bbox=dict(type='L1Loss', loss_weight=5.0),\n                 loss_iou=dict(type='GIoULoss', loss_weight=2.0),\n                 train_cfg=dict(\n                     assigner=dict(\n                         type='HungarianAssigner3D',\n                         cls_cost=dict(type='ClassificationCost', weight=1.),\n                         reg_cost=dict(type='BBoxL1Cost', weight=5.0),\n                         iou_cost=dict(\n                             type='IoUCost', iou_mode='giou', weight=2.0)),),\n                 test_cfg=dict(max_per_img=100),\n                 scalar = 5,\n                 noise_scale = 0.4,\n                 noise_trans = 0.0,\n                 dn_weight = 1.0,\n                 split = 0.5,\n                 init_cfg=None,\n                 normedlinear=False,\n                 different_heads=True,\n                 **kwargs):\n        # NOTE here use `AnchorFreeHead` instead of `TransformerHead`,\n        # since it brings inconvenience when the initialization of\n        # `AnchorFreeHead` is called.\n        self.different_heads = different_heads\n        if 'code_size' in kwargs:\n            self.code_size = kwargs['code_size']\n        else:\n            self.code_size = 10\n        if code_weights is not None:\n            self.code_weights = code_weights\n        else:\n            self.code_weights = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2]\n\n        self.code_weights = self.code_weights[:self.code_size]\n\n        if match_costs is not None:\n            self.match_costs = match_costs\n        else:\n            self.match_costs = self.code_weights\n            \n        self.bg_cls_weight = 0\n        self.sync_cls_avg_factor = sync_cls_avg_factor\n        class_weight = loss_cls.get('class_weight', None)\n        if class_weight is not None and (self.__class__ is SparseHead):\n            assert isinstance(class_weight, float), 'Expected ' \\\n                'class_weight to have type float. Found ' \\\n                f'{type(class_weight)}.'\n            # NOTE following the official DETR rep0, bg_cls_weight means\n            # relative classification weight of the no-object class.\n            bg_cls_weight = loss_cls.get('bg_cls_weight', class_weight)\n            assert isinstance(bg_cls_weight, float), 'Expected ' \\\n                'bg_cls_weight to have type float. Found ' \\\n                f'{type(bg_cls_weight)}.'\n            class_weight = torch.ones(num_classes + 1) * class_weight\n            # set background class as the last indice\n            class_weight[num_classes] = bg_cls_weight\n            loss_cls.update({'class_weight': class_weight})\n            if 'bg_cls_weight' in loss_cls:\n                loss_cls.pop('bg_cls_weight')\n            self.bg_cls_weight = bg_cls_weight\n\n        if train_cfg:\n            assert 'assigner' in train_cfg, 'assigner should be provided '\\\n                'when train_cfg is set.'\n            assigner = train_cfg['assigner']\n\n\n            self.assigner = build_assigner(assigner)\n            # DETR sampling=False, so use PseudoSampler\n            sampler_cfg = dict(type='PseudoSampler')\n            self.sampler = build_sampler(sampler_cfg, context=self)\n\n        self.num_query = num_query\n        self.num_classes = num_classes\n        self.in_channels = in_channels\n        self.memory_len = memory_len\n        self.topk_proposals = topk_proposals\n        self.num_propagated = num_propagated\n        self.with_dn = with_dn\n        self.with_ego_pos = with_ego_pos\n        self.match_with_velo = match_with_velo\n        self.num_reg_fcs = num_reg_fcs\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.fp16_enabled = False\n        self.embed_dims = embed_dims\n        self.with_dn = with_dn\n        self.stride=stride\n\n        self.scalar = scalar\n        self.bbox_noise_scale = noise_scale\n        self.bbox_noise_trans = noise_trans\n        self.dn_weight = dn_weight\n        self.split = split \n\n        self.act_cfg = transformer.get('act_cfg',\n                                       dict(type='ReLU', inplace=True))\n        self.num_pred = transformer['decoder']['num_layers']\n        self.normedlinear = normedlinear\n        super(SparseHead4BEV, self).__init__(num_classes, in_channels, init_cfg = init_cfg)\n\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_bbox = build_loss(loss_bbox)\n        self.loss_iou = build_loss(loss_iou)\n\n        if self.loss_cls.use_sigmoid:\n            self.cls_out_channels = num_classes\n        else:\n            self.cls_out_channels = num_classes + 1\n\n        self.transformer = build_transformer(transformer)\n\n        self.code_weights = nn.Parameter(torch.tensor(\n            self.code_weights), requires_grad=False)\n\n        self.match_costs = nn.Parameter(torch.tensor(\n            self.match_costs), requires_grad=False)\n\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n\n        self.pc_range = nn.Parameter(torch.tensor(\n            self.bbox_coder.pc_range), requires_grad=False)\n\n\n        self._init_layers()\n        self.reset_memory()\n        self.count = 0\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the transformer head.\"\"\"\n        cls_branch = []\n        for _ in range(self.num_reg_fcs):\n            cls_branch.append(Linear(self.embed_dims, self.embed_dims))\n            cls_branch.append(nn.LayerNorm(self.embed_dims))\n            cls_branch.append(nn.ReLU(inplace=True))\n        if self.normedlinear:\n            cls_branch.append(NormedLinear(self.embed_dims, self.cls_out_channels))\n        else:\n            cls_branch.append(Linear(self.embed_dims, self.cls_out_channels))\n        fc_cls = nn.Sequential(*cls_branch)\n\n        reg_branch = []\n        for _ in range(self.num_reg_fcs):\n            reg_branch.append(Linear(self.embed_dims, self.embed_dims))\n            reg_branch.append(nn.ReLU())\n        reg_branch.append(Linear(self.embed_dims, self.code_size))\n        reg_branch = nn.Sequential(*reg_branch)\n\n        def _get_clones(module, N):\n            return nn.ModuleList([copy.deepcopy(module) for i in range(N)])\n\n        if self.different_heads:\n            self.cls_branches =_get_clones(fc_cls, self.num_pred)\n            self.reg_branches = _get_clones(reg_branch, self.num_pred)\n        else:\n            self.cls_branches = nn.ModuleList(\n                [fc_cls for _ in range(self.num_pred)])\n            self.reg_branches = nn.ModuleList(\n                [reg_branch for _ in range(self.num_pred)])\n\n        self.reference_points = nn.Embedding(self.num_query, 3)\n        if self.num_propagated > 0:\n            self.pseudo_reference_points = nn.Embedding(self.num_propagated, 3)\n\n\n        self.query_embedding = nn.Sequential(\n            nn.Linear(self.embed_dims*3//2, self.embed_dims),\n            nn.ReLU(),\n            nn.Linear(self.embed_dims, self.embed_dims),\n        )\n\n        # self.spatial_alignment = MLN(14, use_ln=False)\n\n        self.time_embedding = nn.Sequential(\n            nn.Linear(self.embed_dims, self.embed_dims),\n            nn.LayerNorm(self.embed_dims)\n        )\n\n        # encoding ego pose\n        if self.with_ego_pos:\n            self.ego_pose_pe = MLN(180)\n            self.ego_pose_memory = MLN(180)\n\n    def temporal_alignment(self, query_pos, tgt, reference_points):\n        B = query_pos.size(0)\n\n        temp_reference_points = (self.memory_reference_point - self.pc_range[:3]) / (self.pc_range[3:6] - self.pc_range[0:3])\n        temp_pos = self.query_embedding(pos2posemb3d(temp_reference_points)) \n        temp_memory = self.memory_embedding\n        rec_ego_pose = torch.eye(4, device=query_pos.device).unsqueeze(0).unsqueeze(0).repeat(B, query_pos.size(1), 1, 1)\n\n\n        if self.with_ego_pos:\n            rec_ego_motion = torch.cat([torch.zeros_like(reference_points[...,:3]), rec_ego_pose[..., :3, :].flatten(-2)], dim=-1)\n            rec_ego_motion = nerf_positional_encoding(rec_ego_motion)\n            tgt = self.ego_pose_memory(tgt, rec_ego_motion)\n            query_pos = self.ego_pose_pe(query_pos, rec_ego_motion)\n            memory_ego_motion = torch.cat([self.memory_velo, self.memory_timestamp, self.memory_egopose[..., :3, :].flatten(-2)], dim=-1).float()\n            memory_ego_motion = nerf_positional_encoding(memory_ego_motion)\n\n            temp_pos = self.ego_pose_pe(temp_pos, memory_ego_motion)\n            temp_memory = self.ego_pose_memory(temp_memory, memory_ego_motion)\n\n        query_pos += self.time_embedding(pos2posemb1d(torch.zeros_like(reference_points[...,:1])))\n        temp_pos += self.time_embedding(pos2posemb1d(self.memory_timestamp).float())\n        \n\n\n        if self.num_propagated > 0:\n            tgt = torch.cat([tgt, temp_memory[:, :self.num_propagated]], dim=1)\n            query_pos = torch.cat([query_pos, temp_pos[:, :self.num_propagated]], dim=1)\n            reference_points = torch.cat([reference_points, temp_reference_points[:, :self.num_propagated]], dim=1)\n            rec_ego_pose = torch.eye(4, device=query_pos.device).unsqueeze(0).unsqueeze(0).repeat(B, query_pos.shape[1]+self.num_propagated, 1, 1)\n            temp_memory = temp_memory[:, self.num_propagated:]\n            temp_pos = temp_pos[:, self.num_propagated:]\n            temp_reference_points = temp_reference_points[:, self.num_propagated:]\n            \n        return tgt, query_pos, reference_points, temp_reference_points, temp_memory, temp_pos, rec_ego_pose\n\n    def prepare_for_dn(self, batch_size, reference_points, img_metas, gt_bboxes_3d, gt_labels_3d ):\n        if self.training and self.with_dn:\n            targets = [torch.cat((each.gravity_center, each.tensor[:, 3:]),dim=1) for each in gt_bboxes_3d ]\n            labels = [each for each in gt_labels_3d ]\n            known = [(torch.ones_like(t)).cuda() for t in labels]\n            know_idx = known\n            unmask_bbox = unmask_label = torch.cat(known)\n            #gt_num\n            known_num = [t.size(0) for t in targets]\n        \n            labels = torch.cat([t for t in labels])\n            boxes = torch.cat([t for t in targets])\n            batch_idx = torch.cat([torch.full((t.size(0), ), i) for i, t in enumerate(targets)])\n        \n            known_indice = torch.nonzero(unmask_label + unmask_bbox)\n            known_indice = known_indice.view(-1)\n            # add noise\n            known_indice = known_indice.repeat(self.scalar, 1).view(-1)\n            known_labels = labels.repeat(self.scalar, 1).view(-1).long().to(reference_points.device)\n            known_bid = batch_idx.repeat(self.scalar, 1).view(-1)\n            known_bboxs = boxes.repeat(self.scalar, 1).to(reference_points.device)\n            known_bbox_center = known_bboxs[:, :3].clone()\n            known_bbox_scale = known_bboxs[:, 3:6].clone()\n\n            if self.bbox_noise_scale > 0:\n                diff = known_bbox_scale / 2 + self.bbox_noise_trans\n                rand_prob = torch.rand_like(known_bbox_center) * 2 - 1.0\n                known_bbox_center += torch.mul(rand_prob,\n                                            diff) * self.bbox_noise_scale\n                known_bbox_center[..., 0:3] = (known_bbox_center[..., 0:3] - self.pc_range[0:3]) / (self.pc_range[3:6] - self.pc_range[0:3])\n\n                known_bbox_center = known_bbox_center.clamp(min=0.0, max=1.0)\n                mask = torch.norm(rand_prob, 2, 1) > self.split\n                known_labels[mask] = self.num_classes\n            \n            single_pad = int(max(known_num))\n            pad_size = int(single_pad * self.scalar)\n            padding_bbox = torch.zeros(pad_size, 3).to(reference_points.device)\n            padded_reference_points = torch.cat([padding_bbox, reference_points], dim=0).unsqueeze(0).repeat(batch_size, 1, 1)\n\n            if len(known_num):\n                map_known_indice = torch.cat([torch.tensor(range(num)) for num in known_num])  # [1,2, 1,2,3]\n                map_known_indice = torch.cat([map_known_indice + single_pad * i for i in range(self.scalar)]).long()\n            if len(known_bid):\n                padded_reference_points[(known_bid.long(), map_known_indice)] = known_bbox_center.to(reference_points.device)\n\n            tgt_size = pad_size + self.num_query\n            attn_mask = torch.ones(tgt_size, tgt_size).to(reference_points.device) < 0\n            # match query cannot see the reconstruct\n            attn_mask[pad_size:, :pad_size] = True\n            # reconstruct cannot see each other\n            for i in range(self.scalar):\n                if i == 0:\n                    attn_mask[single_pad * i:single_pad * (i + 1), single_pad * (i + 1):pad_size] = True\n                if i == self.scalar - 1:\n                    attn_mask[single_pad * i:single_pad * (i + 1), :single_pad * i] = True\n                else:\n                    attn_mask[single_pad * i:single_pad * (i + 1), single_pad * (i + 1):pad_size] = True\n                    attn_mask[single_pad * i:single_pad * (i + 1), :single_pad * i] = True\n             \n            # update dn mask for temporal modeling\n            query_size = pad_size + self.num_query + self.num_propagated\n            tgt_size = pad_size + self.num_query + self.memory_len\n            temporal_attn_mask = torch.ones(query_size, tgt_size).to(reference_points.device) < 0\n            temporal_attn_mask[:attn_mask.size(0), :attn_mask.size(1)] = attn_mask \n            temporal_attn_mask[pad_size:, :pad_size] = True\n            attn_mask = temporal_attn_mask\n\n            mask_dict = {\n                'known_indice': torch.as_tensor(known_indice).long(),\n                'batch_idx': torch.as_tensor(batch_idx).long(),\n                'map_known_indice': torch.as_tensor(map_known_indice).long(),\n                'known_lbs_bboxes': (known_labels, known_bboxs),\n                'know_idx': know_idx,\n                'pad_size': pad_size\n            }\n            \n        else:\n            padded_reference_points = reference_points.unsqueeze(0).repeat(batch_size, 1, 1)\n            attn_mask = None\n            mask_dict = None\n\n        return padded_reference_points, attn_mask, mask_dict\n\n\n    def init_weights(self):\n        \"\"\"Initialize weights of the transformer head.\"\"\"\n        # The initialization for transformer is important\n        nn.init.uniform_(self.reference_points.weight.data, 0, 1)\n        if self.num_propagated > 0:\n            nn.init.uniform_(self.pseudo_reference_points.weight.data, 0, 1)\n            self.pseudo_reference_points.weight.requires_grad = False\n        self.transformer.init_weights()\n        if self.loss_cls.use_sigmoid:\n            bias_init = bias_init_with_prob(0.01)\n            for m in self.cls_branches:\n                nn.init.constant_(m[-1].bias, bias_init)\n\n    def reset_memory(self):\n        self.memory_embedding = None\n        self.memory_reference_point = None\n        self.memory_timestamp = None\n        self.memory_egopose = None\n        self.memory_velo = None\n\n    def pre_update_memory(self, data):\n\n        x = 1-data['start_of_sequence'] # original prev_exist, so we need do `not`\n        B = x.size(0)\n        # refresh the memory when the scene changes\n        if self.memory_embedding is None:\n            self.memory_embedding = x.new_zeros(B, self.memory_len, self.embed_dims)\n            self.memory_reference_point = x.new_zeros(B, self.memory_len, 3)\n            self.memory_timestamp = x.new_zeros(B, self.memory_len, 1)\n            self.memory_egopose = x.new_zeros(B, self.memory_len, 4, 4)\n            self.memory_velo = x.new_zeros(B, self.memory_len, 2)\n        else:\n            self.memory_timestamp += data['timestamp'].unsqueeze(-1).unsqueeze(-1)\n            self.memory_egopose = data['ego_pose_inv'].unsqueeze(1) @ self.memory_egopose\n            self.memory_reference_point = transform_reference_points(self.memory_reference_point, data['ego_pose_inv'], reverse=False)\n\n            self.memory_timestamp = memory_refresh(self.memory_timestamp[:, :self.memory_len], x)\n            self.memory_reference_point = memory_refresh(self.memory_reference_point[:, :self.memory_len], x)\n            self.memory_embedding = memory_refresh(self.memory_embedding[:, :self.memory_len], x)\n            self.memory_egopose = memory_refresh(self.memory_egopose[:, :self.memory_len], x)\n            self.memory_velo = memory_refresh(self.memory_velo[:, :self.memory_len], x)\n        \n        # for the first frame, padding pseudo_reference_points (non-learnable)\n        if self.num_propagated > 0:\n            pseudo_reference_points = self.pseudo_reference_points.weight * (self.pc_range[3:6] - self.pc_range[0:3]) + self.pc_range[0:3]\n            self.memory_reference_point[:, :self.num_propagated]  = self.memory_reference_point[:, :self.num_propagated] + (1 - x).view(B, 1, 1) * pseudo_reference_points\n            self.memory_egopose[:, :self.num_propagated]  = self.memory_egopose[:, :self.num_propagated] + (1 - x).view(B, 1, 1, 1) * torch.eye(4, device=x.device)\n\n    def post_update_memory(self, data, rec_ego_pose, all_cls_scores, all_bbox_preds, outs_dec, mask_dict):\n        if self.training and mask_dict and mask_dict['pad_size'] > 0:\n            rec_reference_points = all_bbox_preds[:, :, mask_dict['pad_size']:, :3][-1]\n            rec_velo = all_bbox_preds[:, :, mask_dict['pad_size']:, -2:][-1]\n            rec_memory = outs_dec[:, :, mask_dict['pad_size']:, :][-1]\n            rec_score = all_cls_scores[:, :, mask_dict['pad_size']:, :][-1].sigmoid().topk(1, dim=-1).values[..., 0:1]\n            rec_timestamp = torch.zeros_like(rec_score, dtype=torch.float64)\n        else:\n            rec_reference_points = all_bbox_preds[..., :3][-1]\n            rec_velo = all_bbox_preds[..., -2:][-1]\n            rec_memory = outs_dec[-1]\n            rec_score = all_cls_scores[-1].sigmoid().topk(1, dim=-1).values[..., 0:1]\n            rec_timestamp = torch.zeros_like(rec_score, dtype=torch.float64)\n        \n        # topk proposals\n        _, topk_indexes = torch.topk(rec_score, self.topk_proposals, dim=1)\n        rec_timestamp = topk_gather(rec_timestamp, topk_indexes)\n        rec_reference_points = topk_gather(rec_reference_points, topk_indexes).detach()\n        rec_memory = topk_gather(rec_memory, topk_indexes).detach()\n        rec_ego_pose = topk_gather(rec_ego_pose, topk_indexes)\n        rec_velo = topk_gather(rec_velo, topk_indexes).detach()\n        \n\n        # if self.count == 1:\n        #     from IPython import embed\n        #     embed()\n        #     exit()\n        self.memory_embedding = torch.cat([rec_memory, self.memory_embedding], dim=1)\n        self.memory_timestamp = torch.cat([rec_timestamp, self.memory_timestamp], dim=1)\n        self.memory_egopose= torch.cat([rec_ego_pose, self.memory_egopose], dim=1)\n        self.memory_reference_point = torch.cat([rec_reference_points, self.memory_reference_point], dim=1)\n        self.memory_velo = torch.cat([rec_velo, self.memory_velo], dim=1)\n        # self.memory_reference_point_copy = self.memory_reference_point.clone()\n        self.memory_reference_point = transform_reference_points(self.memory_reference_point, data['ego_pose'], reverse=False)\n        self.memory_timestamp -= data['timestamp'].unsqueeze(-1).unsqueeze(-1)\n\n        self.memory_egopose = data['ego_pose'].unsqueeze(1) @ self.memory_egopose\n\n    def forward(self, input_dict, img_metas,  gt_bboxes_3d=None, gt_labels_3d=None, debug_info=None):\n        \"\"\"Forward function.\n        Args:\n            mlvl_feats (tuple[Tensor]): Features from the upstream\n                network, each is a 5D-tensor with shape\n                (B, N, C, H, W).\n        Returns:\n            all_cls_scores (Tensor): Outputs from the classification head, \\\n                shape [nb_dec, bs, num_query, cls_out_channels]. Note \\\n                cls_out_channels should includes background.\n            all_bbox_preds (Tensor): Sigmoid outputs from the regression \\\n                head with normalized coordinate format (cx, cy, w, l, cz, h, theta, vx, vy). \\\n                Shape [nb_dec, bs, num_query, 9].\n        \"\"\"\n\n        start_of_sequence = torch.FloatTensor([\n            single_img_metas['start_of_sequence'] \n            for single_img_metas in img_metas]).to(input_dict['img_bev_feat'][0].device)\n\n        timestamp = torch.FloatTensor([\n            single_img_metas['timestamp'] \n            for single_img_metas in img_metas]).to(input_dict['img_bev_feat'][0].device)\n\n        ego_pose_inv = torch.stack([\n            single_img_metas['ego_pose_inv'] \n            for single_img_metas in img_metas], 0).to(input_dict['img_bev_feat'][0].device)\n\n        ego_pose = torch.stack([\n            single_img_metas['ego_pose'] \n            for single_img_metas in img_metas], 0).to(input_dict['img_bev_feat'][0].device)\n\n        data = dict(\n            start_of_sequence = start_of_sequence,\n            timestamp = timestamp,\n            ego_pose_inv = ego_pose_inv,\n            ego_pose = ego_pose,\n        )\n\n        if input_dict['img_bev_feat'][0].dim() == 5:\n            mlvl_feats = [level.mean(-1) for level in input_dict['img_bev_feat']]\n        else:\n            mlvl_feats = input_dict['img_bev_feat']\n\n\n        self.pre_update_memory(data)\n        # mlvl_feats = data['img_feats']\n        B = mlvl_feats[0].size(0)\n        # points_to_draw = (self.memory_reference_point -  self.pc_range[0:3])/(self.pc_range[3:6] - self.pc_range[0:3]) * 128\n        # points_to_draw = points_to_draw[0, :, :2]\n        # # print(points_to_draw.shape)\n        # save_tensor(mlvl_feats[0].abs().std(1), f'bev_{self.count}.png')\n        # import cv2\n        # img = cv2.imread(f'bev_{self.count}.png')\n        # for i in range(10):\n        #     img = cv2.circle(img, center=points_to_draw[256*((self.count)%4)+i].cpu().numpy().astype(np.int), thickness=1, radius=1, color=(255,0,0))\n        # cv2.imwrite(f'a_{self.count}.png', img)\n        # self.count +=1\n        # if self.count == 10:\n        #     from IPython import embed\n        #     embed()\n        #     exit()\n\n        reference_points = self.reference_points.weight\n        dtype = reference_points.dtype\n\n        feat_flatten = []\n        spatial_flatten = []\n        for i in range(len(mlvl_feats)):\n            B, C, H, W = mlvl_feats[i].shape\n            mlvl_feat = mlvl_feats[i].reshape(B, C, -1).transpose(1, 2)\n            # mlvl_feat = self.spatial_alignment(mlvl_feat, mln_input)\n            feat_flatten.append(mlvl_feat.to(dtype))\n            spatial_flatten.append((H, W))\n        feat_flatten = torch.cat(feat_flatten, dim=1)\n        spatial_flatten = torch.as_tensor(spatial_flatten, dtype=torch.long, device=mlvl_feats[0].device)\n        level_start_index = torch.cat((spatial_flatten.new_zeros((1, )), spatial_flatten.prod(1).cumsum(0)[:-1]))\n        reference_points, attn_mask, mask_dict = self.prepare_for_dn(B, reference_points, img_metas,  gt_bboxes_3d, gt_labels_3d)\n        query_pos = self.query_embedding(pos2posemb3d(reference_points))\n        tgt = torch.zeros_like(query_pos)\n\n        # prepare for the tgt and query_pos using mln.\n        tgt, query_pos, reference_points, temp_reference_points, temp_memory, temp_pos, rec_ego_pose = self.temporal_alignment(query_pos, tgt, reference_points)\n        \n        init_reference_points = reference_points.clone()\n        outs_dec, intermediate_reference_points = self.transformer(tgt, query_pos, feat_flatten, spatial_flatten, level_start_index, temp_memory, \n                                    temp_pos, attn_mask, reference_points, self.pc_range, data, img_metas, reg_branches=self.reg_branches,\n                                    return_intermediate_pts=True,\n                                    query_embedding=self.query_embedding,\n                                    temp_reference_points=temp_reference_points)\n\n        outs_dec = torch.nan_to_num(outs_dec)\n        outputs_classes = []\n        outputs_coords = []\n        for lvl in range(outs_dec.shape[0]):\n            \n            outputs_class = self.cls_branches[lvl](outs_dec[lvl])\n            tmp = self.reg_branches[lvl](outs_dec[lvl])\n            if self.different_heads:\n                reference = inverse_sigmoid(intermediate_reference_points[lvl])\n            else:\n                reference = inverse_sigmoid(init_reference_points)\n            assert reference.shape[-1] == 3\n            tmp[..., 0:3] += reference[..., 0:3]\n            tmp[..., 0:3] = tmp[..., 0:3].sigmoid()\n            outputs_coord = tmp\n            outputs_classes.append(outputs_class)\n            outputs_coords.append(outputs_coord)\n\n        all_cls_scores = torch.stack(outputs_classes)\n        all_bbox_preds = torch.stack(outputs_coords)\n        all_bbox_preds[..., 0:3] = (all_bbox_preds[..., 0:3] * (self.pc_range[3:6] - self.pc_range[0:3]) + self.pc_range[0:3])\n        \n        # update the memory bank\n        self.post_update_memory(data, rec_ego_pose, all_cls_scores, all_bbox_preds, outs_dec, mask_dict)\n        if mask_dict and mask_dict['pad_size'] > 0:\n            output_known_class = all_cls_scores[:, :, :mask_dict['pad_size'], :]\n            output_known_coord = all_bbox_preds[:, :, :mask_dict['pad_size'], :]\n            outputs_class = all_cls_scores[:, :, mask_dict['pad_size']:, :]\n            outputs_coord = all_bbox_preds[:, :, mask_dict['pad_size']:, :]\n            mask_dict['output_known_lbs_bboxes']=(output_known_class, output_known_coord)\n            outs = {\n                'agent_queries': outs_dec[-1, :, mask_dict['pad_size']:, :],\n                'all_cls_scores': outputs_class,\n                'all_bbox_preds': outputs_coord,\n                'dn_mask_dict':mask_dict,\n            }\n        else:\n            outs = {\n                'agent_queries': outs_dec[-1],\n                'all_cls_scores': all_cls_scores,\n                'all_bbox_preds': all_bbox_preds,\n                'dn_mask_dict':None,\n            }\n\n        return outs\n\n\n    def prepare_for_loss(self, mask_dict):\n        \"\"\"\n        prepare dn components to calculate loss\n        Args:\n            mask_dict: a dict that contains dn information\n        \"\"\"\n        output_known_class, output_known_coord = mask_dict['output_known_lbs_bboxes']\n        known_labels, known_bboxs = mask_dict['known_lbs_bboxes']\n        map_known_indice = mask_dict['map_known_indice'].long()\n        known_indice = mask_dict['known_indice'].long().cpu()\n        batch_idx = mask_dict['batch_idx'].long()\n        bid = batch_idx[known_indice]\n        if len(output_known_class) > 0:\n            output_known_class = output_known_class.permute(1, 2, 0, 3)[(bid, map_known_indice)].permute(1, 0, 2)\n            output_known_coord = output_known_coord.permute(1, 2, 0, 3)[(bid, map_known_indice)].permute(1, 0, 2)\n        num_tgt = known_indice.numel()\n        return known_labels, known_bboxs, output_known_class, output_known_coord, num_tgt\n\n\n    def _get_target_single(self,\n                           cls_score,\n                           bbox_pred,\n                           gt_labels,\n                           gt_bboxes,\n                           gt_bboxes_ignore=None):\n        \"\"\"\"Compute regression and classification targets for one image.\n        Outputs from a single decoder layer of a single feature level are used.\n        Args:\n            cls_score (Tensor): Box score logits from a single decoder layer\n                for one image. Shape [num_query, cls_out_channels].\n            bbox_pred (Tensor): Sigmoid outputs from a single decoder layer\n                for one image, with normalized coordinate (cx, cy, w, h) and\n                shape [num_query, 4].\n            gt_bboxes (Tensor): Ground truth bboxes for one image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (Tensor): Ground truth class indexes for one image\n                with shape (num_gts, ).\n            gt_bboxes_ignore (Tensor, optional): Bounding boxes\n                which can be ignored. Default None.\n        Returns:\n            tuple[Tensor]: a tuple containing the following for one image.\n                - labels (Tensor): Labels of each image.\n                - label_weights (Tensor]): Label weights of each image.\n                - bbox_targets (Tensor): BBox targets of each image.\n                - bbox_weights (Tensor): BBox weights of each image.\n                - pos_inds (Tensor): Sampled positive indexes for each image.\n                - neg_inds (Tensor): Sampled negative indexes for each image.\n        \"\"\"\n\n        num_bboxes = bbox_pred.size(0)\n        # assigner and sampler\n\n        assign_result = self.assigner.assign(bbox_pred, cls_score, gt_bboxes,\n                                                gt_labels, gt_bboxes_ignore, self.match_costs, self.match_with_velo)\n        sampling_result = self.sampler.sample(assign_result, bbox_pred,\n                                              gt_bboxes)\n        pos_inds = sampling_result.pos_inds\n        neg_inds = sampling_result.neg_inds\n\n\n        # label targets\n        labels = gt_bboxes.new_full((num_bboxes, ),\n                                    self.num_classes,\n                                    dtype=torch.long)\n        label_weights = gt_bboxes.new_ones(num_bboxes)\n\n        # bbox targets\n        code_size = gt_bboxes.size(1)\n        bbox_targets = torch.zeros_like(bbox_pred)[..., :code_size]\n        bbox_weights = torch.zeros_like(bbox_pred)\n        # print(gt_bboxes.size(), bbox_pred.size())\n        # DETR\n        if sampling_result.num_gts > 0:\n            bbox_targets[pos_inds] = sampling_result.pos_gt_bboxes\n            bbox_weights[pos_inds] = 1.0\n            labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]\n        return (labels, label_weights, bbox_targets, bbox_weights, \n                pos_inds, neg_inds)\n\n    def get_targets(self,\n                    cls_scores_list,\n                    bbox_preds_list,\n                    gt_bboxes_list,\n                    gt_labels_list,\n                    gt_bboxes_ignore_list=None):\n        \"\"\"\"Compute regression and classification targets for a batch image.\n        Outputs from a single decoder layer of a single feature level are used.\n        Args:\n            cls_scores_list (list[Tensor]): Box score logits from a single\n                decoder layer for each image with shape [num_query,\n                cls_out_channels].\n            bbox_preds_list (list[Tensor]): Sigmoid outputs from a single\n                decoder layer for each image, with normalized coordinate\n                (cx, cy, w, h) and shape [num_query, 4].\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes for each image\n                with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels_list (list[Tensor]): Ground truth class indexes for each\n                image with shape (num_gts, ).\n            gt_bboxes_ignore_list (list[Tensor], optional): Bounding\n                boxes which can be ignored for each image. Default None.\n        Returns:\n            tuple: a tuple containing the following targets.\n                - labels_list (list[Tensor]): Labels for all images.\n                - label_weights_list (list[Tensor]): Label weights for all \\\n                    images.\n                - bbox_targets_list (list[Tensor]): BBox targets for all \\\n                    images.\n                - bbox_weights_list (list[Tensor]): BBox weights for all \\\n                    images.\n                - num_total_pos (int): Number of positive samples in all \\\n                    images.\n                - num_total_neg (int): Number of negative samples in all \\\n                    images.\n        \"\"\"\n        assert gt_bboxes_ignore_list is None, \\\n            'Only supports for gt_bboxes_ignore setting to None.'\n        num_imgs = len(cls_scores_list)\n        gt_bboxes_ignore_list = [\n            gt_bboxes_ignore_list for _ in range(num_imgs)\n        ]\n\n        (labels_list, label_weights_list, bbox_targets_list,\n         bbox_weights_list, pos_inds_list, neg_inds_list) = multi_apply(\n             self._get_target_single, cls_scores_list, bbox_preds_list,\n             gt_labels_list, gt_bboxes_list, gt_bboxes_ignore_list)\n        num_total_pos = sum((inds.numel() for inds in pos_inds_list))\n        num_total_neg = sum((inds.numel() for inds in neg_inds_list))\n        return (labels_list, label_weights_list, bbox_targets_list,\n                bbox_weights_list, num_total_pos, num_total_neg)\n\n    def loss_single(self,\n                    cls_scores,\n                    bbox_preds,\n                    gt_bboxes_list,\n                    gt_labels_list,\n                    gt_bboxes_ignore_list=None):\n        \"\"\"\"Loss function for outputs from a single decoder layer of a single\n        feature level.\n        Args:\n            cls_scores (Tensor): Box score logits from a single decoder layer\n                for all images. Shape [bs, num_query, cls_out_channels].\n            bbox_preds (Tensor): Sigmoid outputs from a single decoder layer\n                for all images, with normalized coordinate (cx, cy, w, h) and\n                shape [bs, num_query, 4].\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes for each image\n                with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels_list (list[Tensor]): Ground truth class indexes for each\n                image with shape (num_gts, ).\n            gt_bboxes_ignore_list (list[Tensor], optional): Bounding\n                boxes which can be ignored for each image. Default None.\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components for outputs from\n                a single decoder layer.\n        \"\"\"\n        num_imgs = cls_scores.size(0)\n        cls_scores_list = [cls_scores[i] for i in range(num_imgs)]\n        bbox_preds_list = [bbox_preds[i] for i in range(num_imgs)]\n        cls_reg_targets = self.get_targets(cls_scores_list, bbox_preds_list,\n                                           gt_bboxes_list, gt_labels_list, \n                                           gt_bboxes_ignore_list)\n        (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,\n         num_total_pos, num_total_neg) = cls_reg_targets\n        labels = torch.cat(labels_list, 0)\n        label_weights = torch.cat(label_weights_list, 0)\n        bbox_targets = torch.cat(bbox_targets_list, 0)\n        bbox_weights = torch.cat(bbox_weights_list, 0)\n\n        # classification loss\n        cls_scores = cls_scores.reshape(-1, self.cls_out_channels)\n        # construct weighted avg_factor to match with the official DETR repo\n        cls_avg_factor = num_total_pos * 1.0 + \\\n            num_total_neg * self.bg_cls_weight\n        if self.sync_cls_avg_factor:\n            cls_avg_factor = reduce_mean(\n                cls_scores.new_tensor([cls_avg_factor]))\n\n        cls_avg_factor = max(cls_avg_factor, 1)\n        loss_cls = self.loss_cls(\n            cls_scores, labels, label_weights, avg_factor=cls_avg_factor)\n\n        # Compute the average number of gt boxes accross all gpus, for\n        # normalization purposes\n        num_total_pos = loss_cls.new_tensor([num_total_pos])\n        num_total_pos = torch.clamp(reduce_mean(num_total_pos), min=1).item()\n\n        # regression L1 loss\n        bbox_preds = bbox_preds.reshape(-1, bbox_preds.size(-1))\n        normalized_bbox_targets = normalize_bbox(bbox_targets, self.pc_range)\n        isnotnan = torch.isfinite(normalized_bbox_targets).all(dim=-1)\n        bbox_weights = bbox_weights * self.code_weights\n\n        loss_bbox = self.loss_bbox(\n                bbox_preds[isnotnan, :10], normalized_bbox_targets[isnotnan, :10], bbox_weights[isnotnan, :10], avg_factor=num_total_pos)\n\n        loss_cls = torch.nan_to_num(loss_cls)\n        loss_bbox = torch.nan_to_num(loss_bbox)\n        return loss_cls, loss_bbox\n\n   \n    def dn_loss_single(self,\n                    cls_scores,\n                    bbox_preds,\n                    known_bboxs,\n                    known_labels,\n                    num_total_pos=None):\n        \"\"\"\"Loss function for outputs from a single decoder layer of a single\n        feature level.\n        Args:\n            cls_scores (Tensor): Box score logits from a single decoder layer\n                for all images. Shape [bs, num_query, cls_out_channels].\n            bbox_preds (Tensor): Sigmoid outputs from a single decoder layer\n                for all images, with normalized coordinate (cx, cy, w, h) and\n                shape [bs, num_query, 4].\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes for each image\n                with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels_list (list[Tensor]): Ground truth class indexes for each\n                image with shape (num_gts, ).\n            gt_bboxes_ignore_list (list[Tensor], optional): Bounding\n                boxes which can be ignored for each image. Default None.\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components for outputs from\n                a single decoder layer.\n        \"\"\"\n        # classification loss\n        cls_scores = cls_scores.reshape(-1, self.cls_out_channels)\n        # construct weighted avg_factor to match with the official DETR repo\n        cls_avg_factor = num_total_pos * 3.14159 / 6 * self.split * self.split  * self.split ### positive rate\n        if self.sync_cls_avg_factor:\n            cls_avg_factor = reduce_mean(\n                cls_scores.new_tensor([cls_avg_factor]))\n        bbox_weights = torch.ones_like(bbox_preds)\n        label_weights = torch.ones_like(known_labels)\n        cls_avg_factor = max(cls_avg_factor, 1)\n        loss_cls = self.loss_cls(\n            cls_scores, known_labels.long(), label_weights, avg_factor=cls_avg_factor)\n\n        # Compute the average number of gt boxes accross all gpus, for\n        # normalization purposes\n        num_total_pos = loss_cls.new_tensor([num_total_pos])\n        num_total_pos = torch.clamp(reduce_mean(num_total_pos), min=1).item()\n\n        # regression L1 loss\n        bbox_preds = bbox_preds.reshape(-1, bbox_preds.size(-1))\n        normalized_bbox_targets = normalize_bbox(known_bboxs, self.pc_range)\n        isnotnan = torch.isfinite(normalized_bbox_targets).all(dim=-1)\n\n        bbox_weights = bbox_weights * self.code_weights\n\n        loss_bbox = self.loss_bbox(\n                bbox_preds[isnotnan, :10], normalized_bbox_targets[isnotnan, :10], bbox_weights[isnotnan, :10], avg_factor=num_total_pos)\n\n        loss_cls = torch.nan_to_num(loss_cls)\n        loss_bbox = torch.nan_to_num(loss_bbox)\n        \n        return self.dn_weight * loss_cls, self.dn_weight * loss_bbox\n    \n    @force_fp32(apply_to=('preds_dicts'))\n    def loss(self,\n             gt_bboxes_list,\n             gt_labels_list,\n             preds_dicts,\n             img_metas=None,\n             gt_bboxes_ignore=None):\n        \"\"\"\"Loss function.\n        Args:\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes for each image\n                with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels_list (list[Tensor]): Ground truth class indexes for each\n                image with shape (num_gts, ).\n            preds_dicts:\n                all_cls_scores (Tensor): Classification score of all\n                    decoder layers, has shape\n                    [nb_dec, bs, num_query, cls_out_channels].\n                all_bbox_preds (Tensor): Sigmoid regression\n                    outputs of all decode layers. Each is a 4D-tensor with\n                    normalized coordinate format (cx, cy, w, h) and shape\n                    [nb_dec, bs, num_query, 4].\n                enc_cls_scores (Tensor): Classification scores of\n                    points on encode feature map , has shape\n                    (N, h*w, num_classes). Only be passed when as_two_stage is\n                    True, otherwise is None.\n                enc_bbox_preds (Tensor): Regression results of each points\n                    on the encode feature map, has shape (N, h*w, 4). Only be\n                    passed when as_two_stage is True, otherwise is None.\n            gt_bboxes_ignore (list[Tensor], optional): Bounding boxes\n                which can be ignored for each image. Default None.\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        assert gt_bboxes_ignore is None, \\\n            f'{self.__class__.__name__} only supports ' \\\n            f'for gt_bboxes_ignore setting to None.'\n\n        all_cls_scores = preds_dicts['all_cls_scores']\n        all_bbox_preds = preds_dicts['all_bbox_preds']\n\n        num_dec_layers = len(all_cls_scores)\n        device = gt_labels_list[0].device\n        gt_bboxes_list = [torch.cat(\n            (gt_bboxes.gravity_center, gt_bboxes.tensor[:, 3:]),\n            dim=1).to(device) for gt_bboxes in gt_bboxes_list]\n\n        all_gt_bboxes_list = [gt_bboxes_list for _ in range(num_dec_layers)]\n        all_gt_labels_list = [gt_labels_list for _ in range(num_dec_layers)]\n        all_gt_bboxes_ignore_list = [\n            gt_bboxes_ignore for _ in range(num_dec_layers)\n        ]\n\n        losses_cls, losses_bbox = multi_apply(\n            self.loss_single, all_cls_scores, all_bbox_preds,\n            all_gt_bboxes_list, all_gt_labels_list, \n            all_gt_bboxes_ignore_list)\n\n        loss_dict = dict()\n\n        # loss_dict['size_loss'] = size_loss\n        # loss from the last decoder layer\n        loss_dict['loss_cls'] = losses_cls[-1]\n        loss_dict['loss_bbox'] = losses_bbox[-1]\n\n        # loss from other decoder layers\n        num_dec_layer = 0\n        for loss_cls_i, loss_bbox_i in zip(losses_cls[:-1],\n                                           losses_bbox[:-1]):\n            loss_dict[f'd{num_dec_layer}.loss_cls'] = loss_cls_i\n            loss_dict[f'd{num_dec_layer}.loss_bbox'] = loss_bbox_i\n            num_dec_layer += 1\n        \n        if preds_dicts['dn_mask_dict'] is not None:\n            known_labels, known_bboxs, output_known_class, output_known_coord, num_tgt = self.prepare_for_loss(preds_dicts['dn_mask_dict'])\n            all_known_bboxs_list = [known_bboxs for _ in range(num_dec_layers)]\n            all_known_labels_list = [known_labels for _ in range(num_dec_layers)]\n            all_num_tgts_list = [\n                num_tgt for _ in range(num_dec_layers)\n            ]\n            \n            dn_losses_cls, dn_losses_bbox = multi_apply(\n                self.dn_loss_single, output_known_class, output_known_coord,\n                all_known_bboxs_list, all_known_labels_list, \n                all_num_tgts_list)\n            loss_dict['dn_loss_cls'] = dn_losses_cls[-1]\n            loss_dict['dn_loss_bbox'] = dn_losses_bbox[-1]\n            num_dec_layer = 0\n            for loss_cls_i, loss_bbox_i in zip(dn_losses_cls[:-1],\n                                            dn_losses_bbox[:-1]):\n                loss_dict[f'd{num_dec_layer}.dn_loss_cls'] = loss_cls_i\n                loss_dict[f'd{num_dec_layer}.dn_loss_bbox'] = loss_bbox_i\n                num_dec_layer += 1\n                \n        elif self.with_dn:\n            dn_losses_cls, dn_losses_bbox = multi_apply(\n                self.loss_single, all_cls_scores, all_bbox_preds,\n                all_gt_bboxes_list, all_gt_labels_list, \n                all_gt_bboxes_ignore_list)\n            loss_dict['dn_loss_cls'] = dn_losses_cls[-1].detach()\n            loss_dict['dn_loss_bbox'] = dn_losses_bbox[-1].detach()     \n            num_dec_layer = 0\n            for loss_cls_i, loss_bbox_i in zip(dn_losses_cls[:-1],\n                                            dn_losses_bbox[:-1]):\n                loss_dict[f'd{num_dec_layer}.dn_loss_cls'] = loss_cls_i.detach()     \n                loss_dict[f'd{num_dec_layer}.dn_loss_bbox'] = loss_bbox_i.detach()     \n                num_dec_layer += 1\n\n        return loss_dict, None\n\n\n    @force_fp32(apply_to=('preds_dicts'))\n    def get_bboxes(self, preds_dicts, img_metas, rescale=False):\n        \"\"\"Generate bboxes from bbox head predictions.\n        Args:\n            preds_dicts (tuple[list[dict]]): Prediction results.\n            img_metas (list[dict]): Point cloud and image's meta info.\n        Returns:\n            list[dict]: Decoded bbox, scores and labels after nms.\n        \"\"\"\n        preds_dicts = self.bbox_coder.decode(preds_dicts, layer_index=-1)\n        num_samples = len(preds_dicts)\n\n        ret_list = []\n        for i in range(num_samples):\n            preds = preds_dicts[i]\n            bboxes = preds['bboxes']\n            bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 5] * 0.5\n            bboxes = img_metas[i]['box_type_3d'](bboxes, bboxes.size(-1))\n            scores = preds['scores']\n            labels = preds['labels']\n            bbox_results = bbox3d2result(bboxes, scores, labels)\n            ret_list.append(bbox_results)\n        return ret_list\n\nclass MLN(nn.Module):\n    ''' \n    Args:\n        c_dim (int): dimension of latent code c\n        f_dim (int): feature dimension\n    '''\n\n    def __init__(self, c_dim, f_dim=256, use_ln=True):\n        super().__init__()\n        self.c_dim = c_dim\n        self.f_dim = f_dim\n        self.use_ln = use_ln\n\n        self.reduce = nn.Sequential(\n            nn.Linear(c_dim, f_dim),\n            nn.ReLU(),\n        )\n        self.gamma = nn.Linear(f_dim, f_dim)\n        self.beta = nn.Linear(f_dim, f_dim)\n        if self.use_ln:\n            self.ln = nn.LayerNorm(f_dim, elementwise_affine=False)\n        self.init_weight()\n\n    def init_weight(self):\n        nn.init.zeros_(self.gamma.weight)\n        nn.init.zeros_(self.beta.weight)\n        nn.init.ones_(self.gamma.bias)\n        nn.init.zeros_(self.beta.bias)\n\n    def forward(self, x, c):\n        if self.use_ln:\n            x = self.ln(x)\n        c = self.reduce(c)\n        gamma = self.gamma(c)\n        beta = self.beta(c)\n        out = gamma * x + beta\n\n        return out"
  },
  {
    "path": "mmdet3d/models/fbbev/track_head/__init__.py",
    "content": "from .trackpetr import TackerHead\nfrom .losses.tracking_loss_combo import TrackingLossCombo\nfrom .track_nms_free_coder import TrackNMSFreeCoder"
  },
  {
    "path": "mmdet3d/models/fbbev/track_head/instances.py",
    "content": "# ------------------------------------------------------------------------\n# Copyright (c) 2023 toyota research instutute.\n# ------------------------------------------------------------------------\n# Modified from MOTR (https://github.com/megvii-model/MOTR/)\n# ------------------------------------------------------------------------\n# Modified from Detectron2 (https://github.com/facebookresearch/detectron2)\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved\n# ------------------------------------------------------------------------\n\nimport itertools\nfrom typing import Any, Dict, List, Tuple, Union\nimport torch\n\n\n    \ndef topk_gather(feat, topk_indexes):\n    if topk_indexes is not None:\n        feat_shape = feat.shape\n        topk_shape = topk_indexes.shape\n        \n        view_shape = [1 for _ in range(len(feat_shape))] \n        view_shape[:2] = topk_shape[:2]\n        topk_indexes = topk_indexes.view(*view_shape)\n        \n        feat = torch.gather(feat, 1, topk_indexes.repeat(1, 1, *feat_shape[2:]))\n    return feat\n\nclass Instances:\n    \"\"\"\n    This class represents a list of instances in an image.\n    It stores the attributes of instances (e.g., boxes, masks, labels, scores) as \"fields\".\n    All fields must have the same ``__len__`` which is the number of instances.\n    All other (non-field) attributes of this class are considered private:\n    they must start with '_' and are not modifiable by a user.\n    Some basic usage:\n    1. Set/get/check a field:\n       .. code-block:: python\n          instances.gt_boxes = Boxes(...)\n          print(instances.pred_masks)  # a tensor of shape (N, H, W)\n          print('gt_masks' in instances)\n    2. ``len(instances)`` returns the number of instances\n    3. Indexing: ``instances[indices]`` will apply the indexing on all the fields\n       and returns a new :class:`Instances`.\n       Typically, ``indices`` is a integer vector of indices,\n       or a binary mask of length ``num_instances``\n       .. code-block:: python\n          category_3_detections = instances[instances.pred_classes == 3]\n          confident_detections = instances[instances.scores > 0.9]\n    \"\"\"\n\n    def __init__(self, image_size: Tuple[int, int], **kwargs: Any):\n        \"\"\"\n        Args:\n            image_size (height, width): the spatial size of the image.\n            kwargs: fields to add to this `Instances`.\n        \"\"\"\n        self._image_size = image_size\n        self._fields: Dict[str, Any] = {}\n        for k, v in kwargs.items():\n            self.set(k, v)\n\n    @property\n    def image_size(self) -> Tuple[int, int]:\n        \"\"\"\n        Returns:\n            tuple: height, width\n        \"\"\"\n        return self._image_size\n\n    def __setattr__(self, name: str, val: Any) -> None:\n        if name.startswith(\"_\"):\n            super().__setattr__(name, val)\n        else:\n            self.set(name, val)\n\n    def __getattr__(self, name: str) -> Any:\n        if name == \"_fields\" or name not in self._fields:\n            raise AttributeError(\"Cannot find field '{}' in the given Instances!\".format(name))\n        return self._fields[name]\n\n    def set(self, name: str, value: Any) -> None:\n        \"\"\"\n        Set the field named `name` to `value`.\n        The length of `value` must be the number of instances,\n        and must agree with other existing fields in this object.\n        \"\"\"\n        data_len = len(value)\n        if len(self._fields):\n            assert (\n                len(self) == data_len\n            ), \"Adding a field of length {} to a Instances of length {}\".format(data_len, len(self))\n        self._fields[name] = value\n\n    def has(self, name: str) -> bool:\n        \"\"\"\n        Returns:\n            bool: whether the field called `name` exists.\n        \"\"\"\n        return name in self._fields\n\n    def remove(self, name: str) -> None:\n        \"\"\"\n        Remove the field called `name`.\n        \"\"\"\n        del self._fields[name]\n\n    def get(self, name: str) -> Any:\n        \"\"\"\n        Returns the field called `name`.\n        \"\"\"\n        return self._fields[name]\n\n    def get_fields(self) -> Dict[str, Any]:\n        \"\"\"\n        Returns:\n            dict: a dict which maps names (str) to data of the fields\n        Modifying the returned dict will modify this instance.\n        \"\"\"\n        return self._fields\n\n    # Tensor-like methods\n    def to(self, *args: Any, **kwargs: Any) -> \"Instances\":\n        \"\"\"\n        Returns:\n            Instances: all fields are called with a `to(device)`, if the field has this method.\n        \"\"\"\n        ret = Instances(self._image_size)\n        for k, v in self._fields.items():\n            if hasattr(v, \"to\"):\n                v = v.to(*args, **kwargs)\n            ret.set(k, v)\n        return ret\n\n    def numpy(self):\n        ret = Instances(self._image_size)\n        for k, v in self._fields.items():\n            if hasattr(v, \"numpy\"):\n                v = v.numpy()\n            ret.set(k, v)\n        return ret\n\n\n    def instances_topk_gather(self, topk_indexes, valid_key_set=None):\n        ret = Instances(self._image_size)\n        for k, v in self._fields.items():\n            if valid_key_set is not None and k not in valid_key_set: pass\n            else:\n                # print(k, v.shape)\n                v = topk_gather(v, topk_indexes)\n            ret.set(k, v)\n        return ret\n\n    def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> \"Instances\":\n        \"\"\"\n        Args:\n            item: an index-like object and will be used to index all the fields.\n        Returns:\n            If `item` is a string, return the data in the corresponding field.\n            Otherwise, returns an `Instances` where all fields are indexed by `item`.\n        \"\"\"\n        if type(item) == int:\n            if item >= len(self) or item < -len(self):\n                raise IndexError(\"Instances index out of range!\")\n            else:\n                item = slice(item, None, len(self))\n\n        ret = Instances(self._image_size)\n        for k, v in self._fields.items():\n            # print(k, type(item), 'getitem', item.type(), item.dtype)\n            # if index by torch.BoolTensor\n            if k == 'kalman_models' and isinstance(item, torch.Tensor):\n                # print(item.shape, 'in get item')\n                ret_list = []\n                for i, if_true in enumerate(item):\n                    if if_true:\n                        ret_list.append(self.kalman_models[i])\n                ret.set(k, ret_list)\n\n            else:\n                ret.set(k, v[item])\n        return ret\n\n    def __len__(self) -> int:\n        for v in self._fields.values():\n            # use __len__ because len() has to be int and is not friendly to tracing\n            return v.__len__()\n        raise NotImplementedError(\"Empty Instances does not support __len__!\")\n\n    def __iter__(self):\n        raise NotImplementedError(\"`Instances` object is not iterable!\")\n\n    @staticmethod\n    def cat(instance_lists: List[\"Instances\"], dim=0) -> \"Instances\":\n        \"\"\"\n        Args:\n            instance_lists (list[Instances])\n        Returns:\n            Instances\n        \"\"\"\n        assert all(isinstance(i, Instances) for i in instance_lists)\n        assert len(instance_lists) > 0\n        if len(instance_lists) == 1:\n            return instance_lists[0]\n\n        image_size = instance_lists[0].image_size\n        for i in instance_lists[1:]:\n            assert i.image_size == image_size\n        ret = Instances(image_size)\n        for k in instance_lists[0]._fields.keys():\n            values = [i.get(k) for i in instance_lists]\n            v0 = values[0]\n            if isinstance(v0, torch.Tensor):\n                # print(k, values[0].shape, values[1].shape, dim)\n                try:\n                    values = torch.cat(values, dim=dim)\n                except:\n                    from IPython import embed\n                    embed()\n                    exit()\n            elif isinstance(v0, list):\n                values = list(itertools.chain(*values))\n            elif hasattr(type(v0), \"cat\"):\n                values = type(v0).cat(values)\n            else:\n                raise ValueError(\"Unsupported type {} for concatenation\".format(type(v0)))\n            ret.set(k, values)\n        return ret\n    \n    def clone(self):\n        ret = Instances(self._image_size)\n        for k, v in self._fields.items():\n            if hasattr(v, 'clone'):\n                v = v.clone()\n            ret.set(k, v)\n        return ret\n    \n    def detach(self):\n        ret = Instances(self._image_size)\n        for k, v in self._fields.items():\n            if hasattr(v, 'detach'):\n                v = v.detach()\n            ret.set(k, v)\n        return ret\n\n\n    def __str__(self) -> str:\n        s = self.__class__.__name__ + \"(\"\n        s += \"num_instances={}, \\n\".format(len(self))\n        s += \"image_height={}, \\n\".format(self._image_size[0])\n        s += \"image_width={}, \\n\".format(self._image_size[1])\n        s += \"fields=[{}])\".format(\", \".join((f\"{k}: {v.shape}\\n\" for k, v in self._fields.items())))\n        return s\n\n    __repr__ = __str__"
  },
  {
    "path": "mmdet3d/models/fbbev/track_head/losses/__init__.py",
    "content": "from .tracking_loss_base import TrackingLossBase\nfrom .tracking_loss import TrackingLoss\nfrom .tracking_loss_prediction import TrackingLossPrediction\nfrom .tracking_loss_mem_bank import TrackingLossMemBank\nfrom .tracking_loss_combo import TrackingLossCombo\n"
  },
  {
    "path": "mmdet3d/models/fbbev/track_head/losses/tracking_loss.py",
    "content": "# ------------------------------------------------------------------------\n# Copyright (c) Toyota Research Institute\n# ------------------------------------------------------------------------\n# Modified from PETR (https://github.com/megvii-research/PETR)\n# Copyright (c) 2022 megvii-model. All Rights Reserved.\n# ------------------------------------------------------------------------\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.runner import force_fp32\nfrom mmdet.models import LOSSES\nfrom mmdet.models import build_loss\nfrom mmdet.core import (build_assigner, reduce_mean, multi_apply, build_sampler)\nfrom mmdet3d.models.fbbev.track_head.streampetr_utils import normalize_bbox\nfrom .tracking_loss_base import TrackingLossBase\n\n\n@LOSSES.register_module()\nclass TrackingLoss(TrackingLossBase):\n    def __init__(self,\n                 *args,\n                 **kwargs):\n\n        super().__init__(*args, **kwargs)\n    \n    def loss_single_frame(self,\n                          frame_idx,\n                          gt_bboxes_list,\n                          gt_labels_list,\n                          instance_inds,\n                          preds_dicts,\n                          gt_bboxes_ignore):\n        \"\"\"Match according to both tracking and detection information\n           Generate the single frame loss function, modify the ids of track instances\n        \"\"\"\n        assert gt_bboxes_ignore is None, \\\n            f'{self.__class__.__name__} only supports ' \\\n            f'for gt_bboxes_ignore setting to None.'\n        \n        all_cls_scores = preds_dicts['all_cls_scores']\n        all_bbox_preds = preds_dicts['all_bbox_preds']\n        # enc_cls_scores = preds_dicts['enc_cls_scores']\n        # enc_bbox_preds = preds_dicts['enc_bbox_preds']\n        track_instances = preds_dicts['track_instances']\n\n        num_dec_layers, B, num_query = all_cls_scores.shape[:3]\n        device = gt_labels_list[0].device\n        # after this operation, [x, y, z-h/2] becomes [x, y, z]\n        gt_bboxes_list = [torch.cat(\n            (gt_bboxes.gravity_center, gt_bboxes.tensor[:, 3:]),\n            dim=1).to(device) for gt_bboxes in gt_bboxes_list]\n        \n        obj_idxes_list = instance_inds[0].tolist()\n\n\n        obj_idx_to_gt_idx = [{obj_idx: gt_idx for gt_idx, obj_idx in enumerate(obj_idxes_list)} for obj_idxes_list in instance_inds]\n\n        num_disappear_track = 0\n        # step 1. Inherit and Update the previous tracks\n        for batch_idx in range(B):\n            for trk_idx in range(num_query):\n                obj_id = track_instances.obj_idxes[batch_idx, trk_idx].item()\n                if obj_id >= 0:\n                    if obj_id in obj_idx_to_gt_idx[batch_idx]:\n                        track_instances.matched_gt_idxes[batch_idx, trk_idx] = obj_idx_to_gt_idx[batch_idx][obj_id]\n                    else:\n                        num_disappear_track += 1\n                        track_instances.matched_gt_idxes[batch_idx, trk_idx] = -2\n                else:\n                    track_instances.matched_gt_idxes[batch_idx, trk_idx] = -1\n       \n        full_track_idxes = torch.arange(num_query, dtype=torch.long)[None].repeat(B, 1).to(all_cls_scores.device)\n        # previsouly tracked, which is matched by rule\n        all_matched_track_idxes =  (track_instances.obj_idxes >= 0).nonzero()  # full_track_idxes[track_instances.obj_idxes >= 0]\n        matched_track_idxes =  (track_instances.matched_gt_idxes >= 0).nonzero() # full_track_idxes[track_instances.matched_gt_idxes >= 0]\n        \n        # step2. select the unmatched slots.\n        # note that the FP tracks whose obj_idxes are -2 will not be selected here.\n        unmatched_track_idxes = (track_instances.obj_idxes == -1).nonzero() # full_track_idxes[track_instances.obj_idxes == -1]\n        \n        m_idxes_list = [matched_track_idxes[matched_track_idxes[:, 0]==i][:, 1] for i in range(B)]\n        um_idxes_list = [unmatched_track_idxes[unmatched_track_idxes[:, 0]==i][:, 1] for i in range(B)]\n\n        # step3. select the untracked gt instances (new tracks).\n        tgt_state = [torch.zeros(len(gt_bboxes_list[i])).to(all_cls_scores.device) for i in range(B)]\n       \n        tgt_indexes_list = []\n        for i in range(B):   \n            tgt_indexes = track_instances.matched_gt_idxes[i]\n            tgt_indexes = tgt_indexes[tgt_indexes >= 0]\n            tgt_indexes_list.append(tgt_indexes)\n            tgt_state[i][tgt_indexes] = 1\n\n        # new tgt indexes\n        untracked_tgt_indexes = [torch.arange(len(gt_bboxes_list[i])).to(all_cls_scores.device)[tgt_state[i] == 0] for i in range(B)]\n\n\n        all_unmatched_gt_bboxes_list = [[gt_bboxes_list[i][untracked_tgt_indexes[i]] for i in range(B)] for _ in range(num_dec_layers)]\n        all_unmatched_gt_labels_list = [[gt_labels_list[i][untracked_tgt_indexes[i]] for i in range(B)] for _ in range(num_dec_layers)]\n        all_unmatched_gt_ids_list = [[torch.tensor(instance_inds[i], device=device)[untracked_tgt_indexes[i]] for i in range(B)] for _ in range(num_dec_layers)]\n        all_unmatched_ignore_list = [None for _ in range(num_dec_layers)]\n\n\n        # unmatched_cls_scores = []\n        # unmatched_bbox_preds = []\n        # for i in range(B):\n        #     unmatched_cls_scores.append(all_cls_scores[:, i, unmatched_track_idxes[unmatched_track_idxes[:, 0]==i]][:, 1])\n        #     unmatched_bbox_preds.append(all_bbox_preds[:, i, unmatched_track_idxes[unmatched_track_idxes[:, 0]==i]][:, 1])\n        # unmatched_cls_scores = all_cls_scores[:, :, unmatched_track_idxes, :]\n        # unmatched_bbox_preds = all_bbox_preds[:, :, unmatched_track_idxes, :]\n\n\n        # step4. do matching between the unmatched slots and GTs.\n        unmatched_track_matching_result = list()\n        for dec_layer_idx in range(num_dec_layers):\n            unmatched_cls_scores = []\n            unmatched_bbox_preds = []\n            for i in range(B):\n                um_idxes = um_idxes_list[i]\n                unmatched_cls_scores.append(all_cls_scores[dec_layer_idx, i, um_idxes])\n                unmatched_bbox_preds.append(all_bbox_preds[dec_layer_idx, i, um_idxes])\n\n            unmatched_track_dec_matching_result = self.get_targets(\n                unmatched_cls_scores,\n                unmatched_bbox_preds,\n                all_unmatched_gt_bboxes_list[dec_layer_idx],\n                all_unmatched_gt_labels_list[dec_layer_idx],\n                all_unmatched_gt_ids_list[dec_layer_idx],\n                all_unmatched_ignore_list[dec_layer_idx])\n\n            unmatched_track_matching_result.append(unmatched_track_dec_matching_result)\n            if dec_layer_idx == num_dec_layers - 1:\n                (labels_list, label_instance_ids_list, label_weights_list, bbox_targets_list,\n                    bbox_weights_list, num_total_pos, num_total_neg, gt_match_idxes_list) = unmatched_track_dec_matching_result\n        \n        # step5. update the obj_idxes according to the matching result with the last decoder layer\n        for i in range(B):\n            um_idxes = um_idxes_list[i]\n            track_instances.obj_idxes[i][um_idxes] = label_instance_ids_list[i]\n            track_instances.matched_gt_idxes[i][um_idxes] = gt_match_idxes_list[i]\n\n        # step6. merge the matching results of tracking/query instances\n        matched_labels = [gt_labels_list[i][tgt_indexes_list[i]].long() for i in range(B)]\n        matched_label_weights = [gt_labels_list[i].new_ones(len(tgt_indexes_list[i])).float()]\n        matched_bbox_targets = [gt_bboxes_list[i][tgt_indexes_list[i]] for i in range(B)]\n        matched_bbox_weights = [torch.ones_like(track_instances.bboxes[i])[:len(tgt_indexes_list[i])] for i in range(B)]\n\n        all_matching_list = list()\n        # matched_track_idxes = full_track_idxes[matched_track_idxes]\n        # unmatched_track_idxes = full_track_idxes[unmatched_track_idxes]\n\n        for dec_layer_idx in range(num_dec_layers):\n            (dec_labels, _, dec_label_weights, dec_bbox_targets,\n                dec_bbox_weights, dec_num_total_pos, dec_num_total_neg, _) = unmatched_track_matching_result[dec_layer_idx]\n\n            labels_list = []\n            label_weights_list = []\n            bbox_targets_list = []\n            bbox_weights_list = []\n            total_pos = dec_num_total_pos + len(matched_track_idxes)\n            total_neg = dec_num_total_neg + num_disappear_track\n            matched_gt_idxes_list = track_instances.obj_idxes.new_full((B, num_query), -1, dtype=torch.long)\n\n            for i in range(B):\n                m_idxes = m_idxes_list[i]\n                um_idxes = um_idxes_list[i]\n                labels = torch.ones_like(track_instances.obj_idxes[i]).long() * self.num_classes\n                labels[m_idxes] = matched_labels[i]\n                labels[um_idxes] = dec_labels[i]\n                labels_list.append(labels)\n            \n                label_weights = torch.ones_like(track_instances.obj_idxes[i]).float()\n                label_weights_list.append(label_weights)\n\n                bbox_targets = torch.zeros_like(track_instances.bboxes[i])[:, :dec_bbox_targets[i].size(1)]\n                bbox_targets[m_idxes] = matched_bbox_targets[i]\n                bbox_targets[um_idxes] = dec_bbox_targets[i]\n                bbox_targets_list.append(bbox_targets)\n\n                bbox_weights = torch.zeros_like(track_instances.bboxes[i])\n                bbox_weights[m_idxes] = 1.0\n                bbox_weights[um_idxes] = dec_bbox_weights[i]\n                bbox_weights_list.append(bbox_weights)\n            \n                matched_gt_idxes_list[i][m_idxes] = track_instances.matched_gt_idxes[i][m_idxes]\n                matched_gt_idxes_list[i][um_idxes] = track_instances.matched_gt_idxes[i][um_idxes]\n\n            dec_matching_results = (labels_list, label_weights_list, bbox_targets_list,\n                                    bbox_weights_list, total_pos, total_neg, matched_gt_idxes_list)\n            all_matching_list.append(dec_matching_results)\n        \n        # step 7. compute the single frame losses\n        # after getting the matching result, we no longer need contents for gt_bboxes_list etc.\n        if self.interm_loss:\n            losses_cls, losses_bbox = multi_apply(\n               self.loss_single_decoder, [frame_idx for _ in range(num_dec_layers)], \n               all_cls_scores, all_bbox_preds,\n               [None for _ in range(num_dec_layers)], [None for _ in range(num_dec_layers)], \n               [None for _ in range(num_dec_layers)], [None for _ in range(num_dec_layers)], \n               all_matching_list)\n        else:\n            losses_cls, losses_bbox = self.loss_single_decoder(frame_idx,\n                all_cls_scores[-1], all_bbox_preds[-1],\n                None, None, None, None, all_matching_list[-1])\n            losses_cls, losses_bbox = [losses_cls], [losses_bbox]\n        \n        loss_dict = dict()\n\n        # loss from the last decoder layer\n        loss_dict[f'f{frame_idx}.loss_cls'] = losses_cls[-1]\n        loss_dict[f'f{frame_idx}.loss_bbox'] = losses_bbox[-1]\n\n        # loss from other decoder layers\n        num_dec_layer = 0\n        for loss_cls_i, loss_bbox_i in zip(losses_cls[:-1],\n                                           losses_bbox[:-1]):\n            loss_dict[f'f{frame_idx}.d{num_dec_layer}.loss_cls'] = loss_cls_i\n            loss_dict[f'f{frame_idx}.d{num_dec_layer}.loss_bbox'] = loss_bbox_i\n            num_dec_layer += 1\n\n        return loss_dict\n"
  },
  {
    "path": "mmdet3d/models/fbbev/track_head/losses/tracking_loss_base.py",
    "content": "# ------------------------------------------------------------------------\n# Copyright (c) 2023 toyota research instutute.\n# ------------------------------------------------------------------------\n# Modified from PETR (https://github.com/megvii-research/PETR)\n# Copyright (c) 2022 megvii-model. All Rights Reserved.\n# ------------------------------------------------------------------------\n# Modified from DETR3D (https://github.com/WangYueFt/detr3d)\n# Copyright (c) 2021 Wang, Yue\n# ------------------------------------------------------------------------\n# Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d)\n# Copyright (c) OpenMMLab. All rights reserved.\n# ------------------------------------------------------------------------\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.runner import force_fp32\nfrom mmdet.models import LOSSES\nfrom mmdet.models import build_loss\nfrom mmdet.core import (build_assigner, reduce_mean, multi_apply, build_sampler)\nfrom mmdet3d.models.fbbev.track_head.streampetr_utils import normalize_bbox\n\n@LOSSES.register_module()\nclass TrackingLossBase(nn.Module):\n    \"\"\" Naive multi-frame loss\n    \"\"\"\n    def __init__(self,\n                 num_classes,\n                 code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],\n                 sync_cls_avg_factor=False,\n                 interm_loss=True,\n                 loss_cls=dict(\n                    type='FocalLoss',\n                    use_sigmoid=True,\n                    gamma=2.0,\n                    alpha=0.25,\n                    loss_weight=2.0),\n                 loss_bbox=dict(type='L1Loss', loss_weight=0.25),\n                 loss_iou=dict(type='GIoULoss', loss_weight=0.0),\n                 assigner=dict(\n                    type='HungarianAssigner3D',\n                    cls_cost=dict(type='FocalLossCost', weight=2.0),\n                    reg_cost=dict(type='BBox3DL1Cost', weight=0.25),\n                    iou_cost=dict(type='IoUCost', weight=0.0), # Fake cost. This is just to make it compatible with DETR head. \n                    pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]),\n                    match_costs=None):\n\n        super().__init__()\n        self.num_classes = num_classes\n        self.interm_loss = interm_loss # if compute separate losses for all the decoders\n        self.assigner = build_assigner(assigner)\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_bbox = build_loss(loss_bbox)\n        self.loss_iou = build_loss(loss_iou)\n        sampler_cfg = dict(type='PseudoSampler')\n        self.sampler = build_sampler(sampler_cfg, context=self)\n\n        self.pc_range = self.assigner.pc_range\n\n        if self.loss_cls.use_sigmoid:\n            self.cls_out_channels = num_classes\n        else:\n            self.cls_out_channels = num_classes + 1\n        \n        if code_weights is not None:\n            self.code_weights = code_weights\n        else:\n            self.code_weights = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2]\n        \n        if match_costs is not None:\n            self.match_costs = match_costs\n        else:\n            self.match_costs = self.code_weights\n\n        self.code_weights = nn.Parameter(torch.tensor(\n            self.code_weights, requires_grad=False), requires_grad=False)\n        \n        self.match_costs = nn.Parameter(torch.tensor(\n            self.match_costs), requires_grad=False)\n\n        self.bg_cls_weight = 0\n        self.sync_cls_avg_factor = sync_cls_avg_factor\n        class_weight = loss_cls.get('class_weight', None)\n        if class_weight is not None:\n            assert isinstance(class_weight, float), 'Expected ' \\\n                'class_weight to have type float. Found ' \\\n                f'{type(class_weight)}.'\n            # NOTE following the official DETR rep0, bg_cls_weight means\n            # relative classification weight of the no-object class.\n            bg_cls_weight = loss_cls.get('bg_cls_weight', class_weight)\n            assert isinstance(bg_cls_weight, float), 'Expected ' \\\n                'bg_cls_weight to have type float. Found ' \\\n                f'{type(bg_cls_weight)}.'\n            class_weight = torch.ones(num_classes + 1) * class_weight\n            # set background class as the last indice\n            class_weight[num_classes] = bg_cls_weight\n            loss_cls.update({'class_weight': class_weight})\n            if 'bg_cls_weight' in loss_cls:\n                loss_cls.pop('bg_cls_weight')\n            self.bg_cls_weight = bg_cls_weight\n    \n    def _get_target_single(self,\n                           cls_score,\n                           bbox_pred,\n                           gt_labels,\n                           gt_bboxes,\n                           instance_inds,\n                           gt_bboxes_ignore=None):\n        \"\"\"\"Compute regression and classification targets for one image.\n        Outputs from a single decoder layer of a single feature level are used.\n        Args:\n            cls_score (Tensor): Box score logits from a single decoder layer\n                for one image. Shape [num_query, cls_out_channels].\n            bbox_pred (Tensor): Sigmoid outputs from a single decoder layer\n                for one image, with normalized coordinate (cx, cy, w, h) and\n                shape [num_query, 4].\n            gt_bboxes (Tensor): Ground truth bboxes for one image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (Tensor): Ground truth class indices for one image\n                with shape (num_gts, ).\n            gt_bboxes_ignore (Tensor, optional): Bounding boxes\n                which can be ignored. Default None.\n        Returns:\n            tuple[Tensor]: a tuple containing the following for one image.\n                - labels (Tensor): Labels of each image.\n                - label_weights (Tensor]): Label weights of each image.\n                - bbox_targets (Tensor): BBox targets of each image.\n                - bbox_weights (Tensor): BBox weights of each image.\n                - pos_inds (Tensor): Sampled positive indices for each image.\n                - neg_inds (Tensor): Sampled negative indices for each image.\n        \"\"\"\n\n        num_bboxes = bbox_pred.size(0)\n        # assigner and sampler\n        assign_result = self.assigner.assign(bbox_pred, cls_score, gt_bboxes,\n                                             gt_labels, gt_bboxes_ignore, self.match_costs)\n        sampling_result = self.sampler.sample(assign_result, bbox_pred,\n                                              gt_bboxes)\n        pos_inds = sampling_result.pos_inds\n        neg_inds = sampling_result.neg_inds\n\n        # label targets\n        labels = gt_bboxes.new_full((num_bboxes, ),\n                                    self.num_classes,\n                                    dtype=torch.long)\n        label_instance_ids = gt_bboxes.new_full((num_bboxes,), -1, dtype=torch.long)\n        gt_match_idxes = gt_bboxes.new_full((num_bboxes,), -1, dtype=torch.long)\n        labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds].long()\n        label_instance_ids[pos_inds] = instance_inds[sampling_result.pos_assigned_gt_inds].long()\n        gt_match_idxes[pos_inds] = sampling_result.pos_assigned_gt_inds.clone().long()\n        label_weights = gt_bboxes.new_ones(num_bboxes)\n\n        # bbox targets\n        code_size = gt_bboxes.size(1)\n        bbox_targets = torch.zeros_like(bbox_pred)[..., :code_size]\n        bbox_weights = torch.zeros_like(bbox_pred)\n        bbox_weights[pos_inds] = 1.0\n        \n        # hack for empty\n        if pos_inds.numel() == 0:\n            sampling_result.pos_gt_bboxes = gt_bboxes.new_empty((0, code_size))\n        bbox_targets[pos_inds] = sampling_result.pos_gt_bboxes\n        return (labels, label_instance_ids, label_weights, bbox_targets, bbox_weights, \n                pos_inds, neg_inds, gt_match_idxes)\n\n    def get_targets(self,\n                    cls_scores_list,\n                    bbox_preds_list,\n                    gt_bboxes_list,\n                    gt_labels_list,\n                    instance_ids_list,\n                    gt_bboxes_ignore_list=None):\n        \"\"\"\"Compute regression and classification targets for a batch image.\n        Outputs from a single decoder layer of a single feature level are used.\n        Args:\n            cls_scores_list (list[Tensor]): Box score logits from a single\n                decoder layer for each image with shape [num_query,\n                cls_out_channels].\n            bbox_preds_list (list[Tensor]): Sigmoid outputs from a single\n                decoder layer for each image, with normalized coordinate\n                (cx, cy, w, h) and shape [num_query, 4].\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes for each image\n                with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels_list (list[Tensor]): Ground truth class indices for each\n                image with shape (num_gts, ).\n            gt_bboxes_ignore_list (list[Tensor], optional): Bounding\n                boxes which can be ignored for each image. Default None.\n        Returns:\n            tuple: a tuple containing the following targets.\n                - labels_list (list[Tensor]): Labels for all images.\n                - label_weights_list (list[Tensor]): Label weights for all \\\n                    images.\n                - bbox_targets_list (list[Tensor]): BBox targets for all \\\n                    images.\n                - bbox_weights_list (list[Tensor]): BBox weights for all \\\n                    images.\n                - num_total_pos (int): Number of positive samples in all \\\n                    images.\n                - num_total_neg (int): Number of negative samples in all \\\n                    images.\n        \"\"\"\n        assert gt_bboxes_ignore_list is None, \\\n            'Only supports for gt_bboxes_ignore setting to None.'\n        num_imgs = len(cls_scores_list)\n        gt_bboxes_ignore_list = [\n            gt_bboxes_ignore_list for _ in range(num_imgs)\n        ]\n\n        (labels_list, label_instance_ids_list, label_weights_list, bbox_targets_list,\n         bbox_weights_list, pos_inds_list, neg_inds_list, gt_match_idxes_list) = multi_apply(\n             self._get_target_single, cls_scores_list, bbox_preds_list,\n             gt_labels_list, gt_bboxes_list, instance_ids_list, gt_bboxes_ignore_list)\n        num_total_pos = sum((inds.numel() for inds in pos_inds_list))\n        num_total_neg = sum((inds.numel() for inds in neg_inds_list))\n        return (labels_list, label_instance_ids_list, label_weights_list, bbox_targets_list,\n                bbox_weights_list, num_total_pos, num_total_neg, gt_match_idxes_list)\n    \n    def loss_single_decoder(self,\n                            frame_idx,\n                            cls_scores,\n                            bbox_preds,\n                            gt_bboxes_list,\n                            gt_labels_list,\n                            instance_ids_list,\n                            gt_bboxes_ignore_list=None,\n                            gt_matching=None,\n                            aux_infos=None):\n        \"\"\"\"Loss function for outputs from a single decoder layer of a single\n        feature level. The sub-function of frame-level loss.\n        Args:\n            cls_scores (Tensor): Box score logits from a single decoder layer\n                for all images. Shape [bs, num_query, cls_out_channels].\n            bbox_preds (Tensor): Sigmoid outputs from a single decoder layer\n                for all images, with normalized coordinate (cx, cy, w, h) and\n                shape [bs, num_query, 4].\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes for each image\n                with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels_list (list[Tensor]): Ground truth class indices for each\n                image with shape (num_gts, ).\n            gt_bboxes_ignore_list (list[Tensor], optional): Bounding\n                boxes which can be ignored for each image. Default None.\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components for outputs from\n                a single decoder layer.\n        \"\"\"\n        num_imgs = cls_scores.size(0)\n        cls_scores_list = [cls_scores[i] for i in range(num_imgs)]\n        bbox_preds_list = [bbox_preds[i] for i in range(num_imgs)]\n        if gt_matching is None:\n            cls_reg_targets = self.get_targets(cls_scores_list, bbox_preds_list,\n                                               gt_bboxes_list, gt_labels_list, instance_ids_list,\n                                               gt_bboxes_ignore_list)\n            (labels_list, _, label_weights_list, bbox_targets_list, bbox_weights_list,\n             num_total_pos, num_total_neg, gt_match_idxes_list) = cls_reg_targets\n        else:\n            (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,\n             num_total_pos, num_total_neg, gt_match_idxes_list) = gt_matching\n        labels = torch.cat(labels_list, 0)\n        label_weights = torch.cat(label_weights_list, 0)\n        bbox_targets = torch.cat(bbox_targets_list, 0)\n        bbox_weights = torch.cat(bbox_weights_list, 0)\n\n        # classification loss\n        cls_scores = cls_scores.reshape(-1, self.cls_out_channels)\n        # construct weighted avg_factor to match with the official DETR repo\n        cls_avg_factor = num_total_pos * 1.0 + \\\n            num_total_neg * self.bg_cls_weight\n        if self.sync_cls_avg_factor:\n            cls_avg_factor = reduce_mean(\n                cls_scores.new_tensor([cls_avg_factor]))\n\n        cls_avg_factor = max(cls_avg_factor, 1)\n        loss_cls = self.loss_cls(\n            cls_scores, labels, label_weights, avg_factor=cls_avg_factor)\n\n        # Compute the average number of gt boxes accross all gpus, for\n        # normalization purposes\n        num_total_pos = loss_cls.new_tensor([num_total_pos])\n        num_total_pos = torch.clamp(reduce_mean(num_total_pos), min=1).item()\n\n        # regression L1 loss\n        bbox_preds = bbox_preds.reshape(-1, bbox_preds.size(-1))\n        normalized_bbox_targets = normalize_bbox(bbox_targets, self.pc_range)\n        isnotnan = torch.isfinite(normalized_bbox_targets).all(dim=-1)\n        bbox_weights = bbox_weights * torch.tensor(self.code_weights).to(bbox_preds.device)\n\n        loss_bbox = self.loss_bbox(\n                bbox_preds[isnotnan, :10], normalized_bbox_targets[isnotnan, :10], bbox_weights[isnotnan, :10], avg_factor=num_total_pos)\n\n        try:\n            loss_cls = torch.nan_to_num(loss_cls)\n            loss_bbox = torch.nan_to_num(loss_bbox)\n        except:\n            loss_cls = nan_to_num(loss_cls)\n            loss_bbox = nan_to_num(loss_bbox)\n\n        return loss_cls, loss_bbox\n    \n    def loss_single_frame(self,\n                          frame_idx,\n                          gt_bboxes_list,\n                          gt_labels_list,\n                          instance_inds,\n                          preds_dicts,\n                          gt_bboxes_ignore):\n        \"\"\"Loss function on a single frame for classification and localization.\n        Args:\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes for each image\n                with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels_list (list[Tensor]): Ground truth class indices for each\n                image with shape (num_gts, ).\n            preds_dicts:\n                all_cls_scores (Tensor): Classification score of all\n                    decoder layers, has shape\n                    [nb_dec, bs, num_query, cls_out_channels].\n                all_bbox_preds (Tensor): Sigmoid regression\n                    outputs of all decode layers. Each is a 4D-tensor with\n                    normalized coordinate format (cx, cy, w, h) and shape\n                    [nb_dec, bs, num_query, 4].\n                enc_cls_scores (Tensor): Classification scores of\n                    points on encode feature map , has shape\n                    (N, h*w, num_classes). Only be passed when as_two_stage is\n                    True, otherwise is None.\n                enc_bbox_preds (Tensor): Regression results of each points\n                    on the encode feature map, has shape (N, h*w, 4). Only be\n                    passed when as_two_stage is True, otherwise is None.\n            gt_bboxes_ignore (list[Tensor], optional): Bounding boxes\n                which can be ignored for each image. Default None.\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        assert gt_bboxes_ignore is None, \\\n            f'{self.__class__.__name__} only supports ' \\\n            f'for gt_bboxes_ignore setting to None.'\n        all_cls_scores = preds_dicts['all_cls_scores']\n        all_bbox_preds = preds_dicts['all_bbox_preds']\n        # enc_cls_scores = preds_dicts['enc_cls_scores']\n        # enc_bbox_preds = preds_dicts['enc_bbox_preds']\n\n        num_dec_layers = len(all_cls_scores)\n        device = gt_labels_list[0].device\n        gt_bboxes_list = [torch.cat(\n            (gt_bboxes.gravity_center, gt_bboxes.tensor[:, 3:]),\n            dim=1).to(device) for gt_bboxes in gt_bboxes_list]\n\n        all_gt_bboxes_list = [gt_bboxes_list for _ in range(num_dec_layers)]\n        all_gt_labels_list = [gt_labels_list for _ in range(num_dec_layers)]\n        all_instance_ids_list = [instance_inds for _ in range(num_dec_layers)]\n        all_gt_bboxes_ignore_list = [\n            gt_bboxes_ignore for _ in range(num_dec_layers)\n        ]\n\n        if self.interm_loss:\n            losses_cls, losses_bbox = multi_apply(\n                self.loss_single_decoder, [frame_idx for _ in range(num_dec_layers)], \n                all_cls_scores, all_bbox_preds,\n                all_gt_bboxes_list, all_gt_labels_list, all_instance_ids_list,\n                all_gt_bboxes_ignore_list)\n        else:\n            losses_cls, losses_bbox = self.loss_single_decoder(num_dec_layers - 1,\n                all_cls_scores[-1], all_bbox_preds[-1],\n                all_gt_bboxes_list[-1], all_gt_labels_list[-1], all_instance_ids_list[-1],\n                all_gt_bboxes_ignore_list[-1])\n            losses_cls, losses_bbox = [losses_cls], [losses_bbox]\n\n        loss_dict = dict()\n\n        # loss from the last decoder layer\n        loss_dict[f'f{frame_idx}.loss_cls'] = losses_cls[-1]\n        loss_dict[f'f{frame_idx}.loss_bbox'] = losses_bbox[-1]\n\n        # loss from other decoder layers\n        num_dec_layer = 0\n        for loss_cls_i, loss_bbox_i in zip(losses_cls[:-1],\n                                           losses_bbox[:-1]):\n            loss_dict[f'f{frame_idx}.d{num_dec_layer}.loss_cls'] = loss_cls_i\n            loss_dict[f'f{frame_idx}.d{num_dec_layer}.loss_bbox'] = loss_bbox_i\n            num_dec_layer += 1\n\n        return loss_dict\n\n    @force_fp32(apply_to=('preds_dicts'))\n    def forward(self,\n                preds_dicts):\n        \"\"\"Loss function for multi-frame tracking\n        \"\"\"\n        frame_num = len(preds_dicts)\n        losses_dicts = [p.pop('loss_dict') for p in preds_dicts]\n        loss_dict = dict()\n        for key in losses_dicts[-1].keys():\n            # example loss_dict[\"d2.loss_cls\"] = losses_dicts[-1][\"f0.d2.loss_cls\"]\n            loss_dict[key[3:]] = losses_dicts[-1][key]\n        \n        for frame_loss in losses_dicts[:-1]:\n            loss_dict.update(frame_loss)\n\n        return loss_dict\n\n\ndef nan_to_num(x, nan=0.0, posinf=None, neginf=None):\n    x[torch.isnan(x)]= nan\n    if posinf is not None:\n        x[torch.isposinf(x)] = posinf\n    if neginf is not None:\n        x[torch.isneginf(x)] = posinf\n    return x"
  },
  {
    "path": "mmdet3d/models/fbbev/track_head/losses/tracking_loss_combo.py",
    "content": "# ------------------------------------------------------------------------\n# Copyright (c) Toyota Research Institute\n# ------------------------------------------------------------------------\n# Modified from PETR (https://github.com/megvii-research/PETR)\n# Copyright (c) 2022 megvii-model. All Rights Reserved.\n# ------------------------------------------------------------------------\n# Modified from DETR3D (https://github.com/WangYueFt/detr3d)\n# Copyright (c) 2021 Wang, Yue\n# ------------------------------------------------------------------------\n# Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d)\n# Copyright (c) OpenMMLab. All rights reserved.\n# ------------------------------------------------------------------------\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.runner import force_fp32\nfrom mmdet.models import LOSSES\nfrom mmdet.models import build_loss\nfrom mmdet.core import (build_assigner, reduce_mean, multi_apply, build_sampler)\nfrom mmdet3d.models.fbbev.track_head.streampetr_utils import normalize_bbox\nfrom .tracking_loss import TrackingLoss\n\n\n@LOSSES.register_module()\nclass TrackingLossCombo(TrackingLoss):\n    \"\"\" Tracking loss with reference point supervision\n    \"\"\"\n    def __init__(self,\n                 num_classes,\n                 code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],\n                 sync_cls_avg_factor=False,\n                 interm_loss=True,\n                 loss_cls=dict(\n                    type='FocalLoss',\n                    use_sigmoid=True,\n                    gamma=2.0,\n                    alpha=0.25,\n                    loss_weight=2.0),\n                 loss_bbox=dict(type='L1Loss', loss_weight=0.25),\n                 loss_iou=dict(type='GIoULoss', loss_weight=0.0),\n                 loss_prediction=dict(type='L1Loss', loss_weight=1.0),\n                 assigner=dict(\n                    type='HungarianAssigner3D',\n                    cls_cost=dict(type='FocalLossCost', weight=2.0),\n                    reg_cost=dict(type='BBox3DL1Cost', weight=0.25),\n                    iou_cost=dict(type='IoUCost', weight=0.0), # Fake cost. This is just to make it compatible with DETR head. \n                    pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0])):\n\n        super(TrackingLoss, self).__init__(\n            num_classes, code_weights, sync_cls_avg_factor, interm_loss,\n            loss_cls, loss_bbox, loss_iou, assigner)\n        self.loss_traj = build_loss(loss_prediction)\n        self.loss_mem_cls = build_loss(loss_cls)\n        # self.loc_refine_code_weights = [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n        self.loc_refine_code_weights = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2]\n    \n    def loss_prediction(self,\n                        frame_idx,\n                        loss_dict,\n                        gt_trajs,\n                        gt_masks,\n                        pred_trajs,\n                        loss_key='for'):\n        loss_prediction = self.loss_traj(\n            gt_trajs[..., :2] * gt_masks.unsqueeze(-1), \n            pred_trajs[..., :2] * gt_masks.unsqueeze(-1))\n        loss_dict[f'f{frame_idx}.loss_{loss_key}'] = loss_prediction\n        return loss_dict\n    \n    def loss_mem_bank(self,\n                      frame_idx,\n                      loss_dict,\n                      gt_bboxes_list,\n                      gt_labels_list,\n                      instance_inds,\n                      track_instances):\n        obj_idxes_list = instance_inds[0].detach().cpu().numpy().tolist()\n        obj_idx_to_gt_idx = {obj_idx: gt_idx for gt_idx, obj_idx in enumerate(obj_idxes_list)}\n        device = track_instances.query_feats.device\n\n        # classification loss\n        matched_labels = torch.ones((len(track_instances), ), dtype=torch.long, device=device) * self.num_classes\n        matched_label_weights = torch.ones((len(track_instances), ), dtype=torch.float32, device=device)\n        num_pos, num_neg = 0, 0\n        for track_idx, id in enumerate(track_instances.obj_idxes):\n            cpu_id = id.cpu().numpy().tolist()\n            if cpu_id not in obj_idx_to_gt_idx.keys():\n                num_neg += 1\n                continue\n            index = obj_idx_to_gt_idx[cpu_id]\n            matched_labels[track_idx] = gt_labels_list[0][index].long()\n            num_pos += 1\n\n        labels_list = matched_labels\n        label_weights_list = matched_label_weights\n        cls_scores = track_instances.cache_logits\n\n        cls_avg_factor = num_pos * 1.0 + \\\n            num_neg * self.bg_cls_weight\n        if self.sync_cls_avg_factor:\n            cls_avg_factor = reduce_mean(\n                cls_scores.new_tensor([cls_avg_factor]))\n        \n        cls_avg_factor = max(cls_avg_factor, 1)\n        loss_cls = self.loss_mem_cls(\n            cls_scores, labels_list, label_weights_list, avg_factor=cls_avg_factor)\n        loss_cls = torch.nan_to_num(loss_cls)\n\n        loss_dict[f'f{frame_idx}.loss_mem_cls'] = loss_cls\n\n        # location refinement loss\n        gt_bboxes_list = [torch.cat(\n            (gt_bboxes.gravity_center, gt_bboxes.tensor[:, 3:]),\n            dim=1).to(device) for gt_bboxes in gt_bboxes_list]\n\n        pos_bbox_num = 0\n        matched_bbox_targets = torch.zeros((len(track_instances), gt_bboxes_list[0].shape[1]), dtype=torch.float32, device=device)\n        matched_bbox_weights = torch.zeros((len(track_instances),len(self.loc_refine_code_weights)), dtype=torch.float32, device=device)\n        for track_idx, id in enumerate(track_instances.obj_idxes):\n            cpu_id = id.cpu().numpy().tolist()\n            if cpu_id not in obj_idx_to_gt_idx.keys():\n                matched_bbox_weights[track_idx] = 0.0\n                continue\n            index = obj_idx_to_gt_idx[cpu_id]\n            matched_bbox_targets[track_idx] = gt_bboxes_list[0][index].float()\n            matched_bbox_weights[track_idx] = 1.0\n            pos_bbox_num += 1\n\n        normalized_bbox_targets = normalize_bbox(matched_bbox_targets, self.pc_range)\n        isnotnan = torch.isfinite(normalized_bbox_targets).all(dim=-1)\n        bbox_weights = matched_bbox_weights * torch.tensor(self.loc_refine_code_weights).to(device)\n\n        loss_bbox = self.loss_bbox(\n                track_instances.cache_bboxes[isnotnan, :10], normalized_bbox_targets[isnotnan, :10], bbox_weights[isnotnan, :10], avg_factor=pos_bbox_num)\n        loss_dict[f'f{frame_idx}.loss_mem_bbox'] = loss_bbox\n        return loss_dict\n\n    @force_fp32(apply_to=('preds_dicts'))\n    def forward(self,\n                preds_dicts):\n        \"\"\"Loss function for multi-frame tracking\n        \"\"\"\n        frame_num = len(preds_dicts)\n        losses_dicts = [p.pop('loss_dict') for p in preds_dicts]\n        loss_dict = dict()\n\n        for key in losses_dicts[-1].keys():\n            # example loss_dict[\"d2.loss_cls\"] = losses_dicts[-1][\"f0.d2.loss_cls\"]\n            loss_dict[key[3:]] = losses_dicts[-1][key]\n        \n        for frame_loss in losses_dicts[:-1]:\n            loss_dict.update(frame_loss)\n\n        return loss_dict\n\n\ndef nan_to_num(x, nan=0.0, posinf=None, neginf=None):\n    x[torch.isnan(x)]= nan\n    if posinf is not None:\n        x[torch.isposinf(x)] = posinf\n    if neginf is not None:\n        x[torch.isneginf(x)] = posinf\n    return x"
  },
  {
    "path": "mmdet3d/models/fbbev/track_head/losses/tracking_loss_mem_bank.py",
    "content": "# ------------------------------------------------------------------------\n# Copyright (c) Toyota Research Institute\n# ------------------------------------------------------------------------\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.runner import force_fp32\nfrom mmdet.models import LOSSES\nfrom mmdet.models import build_loss\nfrom mmdet.core import (build_assigner, reduce_mean, multi_apply, build_sampler)\nfrom mmdet3d.models.fbbev.track_head.streampetr_utils import normalize_bbox\nfrom .tracking_loss import TrackingLoss\n\n\n@LOSSES.register_module()\nclass TrackingLossMemBank(TrackingLoss):\n    def __init__(self,\n                 num_classes,\n                 code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],\n                 sync_cls_avg_factor=False,\n                 interm_loss=True,\n                 loss_cls=dict(\n                    type='FocalLoss',\n                    use_sigmoid=True,\n                    gamma=2.0,\n                    alpha=0.25,\n                    loss_weight=2.0),\n                 loss_bbox=dict(type='L1Loss', loss_weight=0.25),\n                 loss_iou=dict(type='GIoULoss', loss_weight=0.0),\n                 assigner=dict(\n                    type='HungarianAssigner3D',\n                    cls_cost=dict(type='FocalLossCost', weight=2.0),\n                    reg_cost=dict(type='BBox3DL1Cost', weight=0.25),\n                    iou_cost=dict(type='IoUCost', weight=0.0), # Fake cost. This is just to make it compatible with DETR head. \n                    pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0])):\n\n        super(TrackingLoss, self).__init__(\n            num_classes, code_weights, sync_cls_avg_factor, interm_loss,\n            loss_cls, loss_bbox, loss_iou, assigner)\n        self.loss_mem_cls = build_loss(loss_cls)\n        self.loc_refine_code_weights = [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n    \n    def loss_mem_bank(self,\n                      frame_idx,\n                      loss_dict,\n                      gt_bboxes_list,\n                      gt_labels_list,\n                      instance_inds,\n                      track_instances):\n        obj_idxes_list = instance_inds[0].detach().cpu().numpy().tolist()\n        obj_idx_to_gt_idx = {obj_idx: gt_idx for gt_idx, obj_idx in enumerate(obj_idxes_list)}\n        device = track_instances.output_embedding.device\n\n        # classification loss\n        matched_labels = torch.ones((len(track_instances), ), dtype=torch.long, device=device) * self.num_classes\n        matched_label_weights = torch.ones((len(track_instances), ), dtype=torch.float32, device=device)\n        num_pos, num_neg = 0, 0\n        for track_idx, id in enumerate(track_instances.obj_idxes):\n            cpu_id = id.cpu().numpy().tolist()\n            if cpu_id not in obj_idx_to_gt_idx.keys():\n                num_neg += 1\n                continue\n            index = obj_idx_to_gt_idx[cpu_id]\n            matched_labels[track_idx] = gt_labels_list[0][index].long()\n            num_pos += 1\n\n        labels_list = matched_labels\n        label_weights_list = matched_label_weights\n        cls_scores = track_instances.mem_pred_logits[:, -1, :]\n\n        cls_avg_factor = num_pos * 1.0 + \\\n            num_neg * self.bg_cls_weight\n        if self.sync_cls_avg_factor:\n            cls_avg_factor = reduce_mean(\n                cls_scores.new_tensor([cls_avg_factor]))\n        \n        cls_avg_factor = max(cls_avg_factor, 1)\n        loss_cls = self.loss_mem_cls(\n            cls_scores, labels_list, label_weights_list, avg_factor=cls_avg_factor)\n        loss_cls = torch.nan_to_num(loss_cls)\n\n        loss_dict[f'f{frame_idx}.loss_mem_cls'] = loss_cls\n\n        # location refinement loss\n        gt_bboxes_list = [torch.cat(\n            (gt_bboxes.gravity_center, gt_bboxes.tensor[:, 3:]),\n            dim=1).to(device) for gt_bboxes in gt_bboxes_list]\n\n        pos_bbox_num = 0\n        matched_bbox_targets = torch.zeros((len(track_instances), gt_bboxes_list[0].shape[1]), dtype=torch.float32, device=device)\n        matched_bbox_weights = torch.zeros((len(track_instances),len(self.loc_refine_code_weights)), dtype=torch.float32, device=device)\n        for track_idx, id in enumerate(track_instances.obj_idxes):\n            cpu_id = id.cpu().numpy().tolist()\n            if cpu_id not in obj_idx_to_gt_idx.keys():\n                matched_bbox_weights[track_idx] = 0.0\n                continue\n            index = obj_idx_to_gt_idx[cpu_id]\n            matched_bbox_targets[track_idx] = gt_bboxes_list[0][index].float()\n            matched_bbox_weights[track_idx] = 1.0\n            pos_bbox_num += 1\n\n        normalized_bbox_targets = normalize_bbox(matched_bbox_targets, self.pc_range)\n        isnotnan = torch.isfinite(normalized_bbox_targets).all(dim=-1)\n        bbox_weights = matched_bbox_weights * torch.tensor(self.loc_refine_code_weights).to(device)\n\n        loss_bbox = self.loss_bbox(\n                track_instances.bbox_preds[isnotnan, :10], normalized_bbox_targets[isnotnan, :10], bbox_weights[isnotnan, :10], avg_factor=pos_bbox_num)\n        loss_dict[f'f{frame_idx}.loss_mem_bbox'] = loss_bbox\n        return loss_dict\n\n    @force_fp32(apply_to=('preds_dicts'))\n    def forward(self,\n                preds_dicts):\n        \"\"\"Loss function for multi-frame tracking\n        \"\"\"\n        frame_num = len(preds_dicts)\n        losses_dicts = [p.pop('loss_dict') for p in preds_dicts]\n        loss_dict = dict()\n        for key in losses_dicts[-1].keys():\n            # example loss_dict[\"d2.loss_cls\"] = losses_dicts[-1][\"f0.d2.loss_cls\"]\n            loss_dict[key[3:]] = losses_dicts[-1][key]\n        \n        for frame_loss in losses_dicts[:-1]:\n            loss_dict.update(frame_loss)\n\n        return loss_dict"
  },
  {
    "path": "mmdet3d/models/fbbev/track_head/losses/tracking_loss_prediction.py",
    "content": "# ------------------------------------------------------------------------\n# Copyright (c) Toyota Research Institute\n# ------------------------------------------------------------------------\n# Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d)\n# Copyright (c) OpenMMLab. All rights reserved.\n# ------------------------------------------------------------------------\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.runner import force_fp32\nfrom mmdet.models import LOSSES\nfrom mmdet.models import build_loss\nfrom mmdet.core import (build_assigner, reduce_mean, multi_apply, build_sampler)\nfrom mmdet3d.models.fbbev.track_head.streampetr_utils import normalize_bbox\nfrom .tracking_loss import TrackingLoss\n\n\n@LOSSES.register_module()\nclass TrackingLossPrediction(TrackingLoss):\n    \"\"\" Tracking loss with reference point supervision\n    \"\"\"\n    def __init__(self,\n                 num_classes,\n                 code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],\n                 sync_cls_avg_factor=False,\n                 interm_loss=True,\n                 loss_cls=dict(\n                    type='FocalLoss',\n                    use_sigmoid=True,\n                    gamma=2.0,\n                    alpha=0.25,\n                    loss_weight=2.0),\n                 loss_bbox=dict(type='L1Loss', loss_weight=0.25),\n                 loss_iou=dict(type='GIoULoss', loss_weight=0.0),\n                 loss_prediction=dict(type='L1Loss', loss_weight=1.0),\n                 assigner=dict(\n                    type='HungarianAssigner3D',\n                    cls_cost=dict(type='FocalLossCost', weight=2.0),\n                    reg_cost=dict(type='BBox3DL1Cost', weight=0.25),\n                    iou_cost=dict(type='IoUCost', weight=0.0), # Fake cost. This is just to make it compatible with DETR head. \n                    pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0])):\n\n        super(TrackingLoss, self).__init__(\n            num_classes, code_weights, sync_cls_avg_factor, interm_loss,\n            loss_cls, loss_bbox, loss_iou, assigner)\n        self.loss_traj = build_loss(loss_prediction)\n    \n    def loss_prediction(self,\n                        frame_idx,\n                        loss_dict,\n                        gt_trajs,\n                        gt_masks,\n                        pred_trajs,\n                        loss_key='for'):\n        loss_prediction = self.loss_traj(\n            gt_trajs[..., :2] * gt_masks.unsqueeze(-1), \n            pred_trajs[..., :2] * gt_masks.unsqueeze(-1))\n        loss_dict[f'f{frame_idx}.loss_{loss_key}'] = loss_prediction\n        return loss_dict\n\n    @force_fp32(apply_to=('preds_dicts'))\n    def forward(self,\n                preds_dicts):\n        \"\"\"Loss function for multi-frame tracking\n        \"\"\"\n        frame_num = len(preds_dicts)\n        losses_dicts = [p.pop('loss_dict') for p in preds_dicts]\n        loss_dict = dict()\n\n        for key in losses_dicts[-1].keys():\n            # example loss_dict[\"d2.loss_cls\"] = losses_dicts[-1][\"f0.d2.loss_cls\"]\n            loss_dict[key[3:]] = losses_dicts[-1][key]\n        \n        for frame_loss in losses_dicts[:-1]:\n            loss_dict.update(frame_loss)\n\n        return loss_dict\n\n\ndef nan_to_num(x, nan=0.0, posinf=None, neginf=None):\n    x[torch.isnan(x)]= nan\n    if posinf is not None:\n        x[torch.isposinf(x)] = posinf\n    if neginf is not None:\n        x[torch.isneginf(x)] = posinf\n    return x"
  },
  {
    "path": "mmdet3d/models/fbbev/track_head/runtime_tracker.py",
    "content": "# ------------------------------------------------------------------------\n# Copyright (c) 2023 toyota research instutute.\n# ------------------------------------------------------------------------\nfrom .instances import Instances\nimport torch\nimport numpy as np\n\n\nclass RunTimeTracker:\n    def __init__(self, output_threshold=0.2, score_threshold=0.4, \n                       max_age_since_update=1, **kwargs):\n        self.current_id = 1\n        self.current_seq = 0\n        self.timestamp = None\n        self.time_delta = None\n        self.query_embeddings = None\n        self.reference_points = None\n        self.frame_index = 0\n    \n        self.track_instances = None\n        self.timestamp = None\n        self.first_frame = None\n\n        self.threshold = score_threshold\n        self.output_threshold = output_threshold\n        self.max_age_since_update = max_age_since_update\n    \n    def update_active_tracks(self, track_instances, active_mask):\n        # first frame\n        if self.track_instances is None:\n            self.track_instances = track_instances[active_mask]\n            return\n        \n        live_mask = torch.zeros_like(track_instances.obj_idxes).bool().detach()\n        for i in range(len(track_instances)):\n            if active_mask[i]:\n                track_instances.disappear_time[i] = 0\n                live_mask[i] = True\n            elif track_instances.track_query_mask[i]:\n                track_instances.disappear_time[i] += 1\n                if track_instances.disappear_time[i] < self.max_age_since_update:\n                    live_mask[i] = True\n        self.track_instances = track_instances[live_mask]\n        return\n    \n    def get_active_mask(self, track_instances, training=True):\n        if training:\n            active_mask = (track_instances.matched_gt_idxes >= 0)\n        return active_mask\n    \n    def empty(self):\n        \"\"\"Copy the historical buffer parts from the init\n        \"\"\"\n        self.current_id = 1\n        self.current_seq = 0\n        self.timestamp = None\n        self.query_embeddings = None\n        self.reference_points = None\n        self.frame_index = 0\n    \n        self.track_instances = None\n        self.timestamp = None\n        self.first_frame = None"
  },
  {
    "path": "mmdet3d/models/fbbev/track_head/streampetr_utils.py",
    "content": "import torch\n\ndef normalize_bbox(bboxes, pc_range):\n    cx = bboxes[..., 0:1]\n    cy = bboxes[..., 1:2]\n    cz = bboxes[..., 2:3]\n    w = bboxes[..., 3:4].log()\n    l = bboxes[..., 4:5].log()\n    h = bboxes[..., 5:6].log()\n\n    rot = bboxes[..., 6:7]\n    if bboxes.size(-1) > 7:\n        vx = bboxes[..., 7:8] \n        vy = bboxes[..., 8:9]\n        normalized_bboxes = torch.cat(\n            (cx, cy, cz, w, l, h, rot.sin(), rot.cos(), vx, vy), dim=-1\n        )\n    else:\n        normalized_bboxes = torch.cat(\n            (cx, cy, cz, w, l, h, rot.sin(), rot.cos()), dim=-1\n        )\n    return normalized_bboxes\n\n# ------------------------------------------------------------------------\n# Copyright (c) 2022 megvii-model. All Rights Reserved.\n# ------------------------------------------------------------------------\n# Modified from mmdetection (https://github.com/open-mmlab/mmdetection)\n# Copyright (c) OpenMMLab. All rights reserved.\n# ------------------------------------------------------------------------\n#  Modified by Shihao Wang\n# ------------------------------------------------------------------------\nimport math\nimport torch\nimport torch.nn as nn \nimport numpy as np\n\ndef denormalize_bbox(normalized_bboxes, pc_range):\n    # rotation \n    rot_sine = normalized_bboxes[..., 6:7]\n\n    rot_cosine = normalized_bboxes[..., 7:8]\n    rot = torch.atan2(rot_sine, rot_cosine)\n\n    # center in the bev\n    cx = normalized_bboxes[..., 0:1]\n    cy = normalized_bboxes[..., 1:2]\n    cz = normalized_bboxes[..., 2:3]\n\n    # size\n    w = normalized_bboxes[..., 3:4]\n    l = normalized_bboxes[..., 4:5]\n    h = normalized_bboxes[..., 5:6]\n\n    w = w.exp() \n    l = l.exp() \n    h = h.exp() \n    if normalized_bboxes.size(-1) > 8:\n         # velocity \n        vx = normalized_bboxes[:, 8:9]\n        vy = normalized_bboxes[:, 9:10]\n        denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1)\n    else:\n        denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1)\n    return denormalized_bboxes\n    \ndef pos2posemb3d(pos, num_pos_feats=128, temperature=10000):\n    scale = 2 * math.pi\n    pos = pos * scale\n    dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=pos.device)\n    dim_t = temperature ** (2 * torch.div(dim_t, 2, rounding_mode='floor') / num_pos_feats)\n    pos_x = pos[..., 0, None] / dim_t\n    pos_y = pos[..., 1, None] / dim_t\n    pos_z = pos[..., 2, None] / dim_t\n    pos_x = torch.stack((pos_x[..., 0::2].sin(), pos_x[..., 1::2].cos()), dim=-1).flatten(-2)\n    pos_y = torch.stack((pos_y[..., 0::2].sin(), pos_y[..., 1::2].cos()), dim=-1).flatten(-2)\n    pos_z = torch.stack((pos_z[..., 0::2].sin(), pos_z[..., 1::2].cos()), dim=-1).flatten(-2)\n    posemb = torch.cat((pos_y, pos_x, pos_z), dim=-1)\n    return posemb\n\ndef bevpos2posemb(pos, num_pos_feats=128, temperature=10000):\n    scale = 2 * math.pi\n    pos = pos * scale\n    dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=pos.device)\n    dim_t = temperature ** (2 * torch.div(dim_t, 2, rounding_mode='floor') / num_pos_feats)\n    pos_x = pos[..., 0, None] / dim_t\n    pos_y = pos[..., 1, None] / dim_t\n    pos_x = torch.stack((pos_x[..., 0::2].sin(), pos_x[..., 1::2].cos()), dim=-1).flatten(-2)\n    pos_y = torch.stack((pos_y[..., 0::2].sin(), pos_y[..., 1::2].cos()), dim=-1).flatten(-2)\n    posemb = torch.cat((pos_y, pos_x), dim=-1)\n    return posemb\n\ndef pos2posemb1d(pos, num_pos_feats=256, temperature=10000):\n    scale = 2 * math.pi\n    pos = pos * scale\n    dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=pos.device)\n    dim_t = temperature ** (2 * torch.div(dim_t, 2, rounding_mode='floor') / num_pos_feats)\n    pos_x = pos[..., 0, None] / dim_t\n\n    pos_x = torch.stack((pos_x[..., 0::2].sin(), pos_x[..., 1::2].cos()), dim=-1).flatten(-2)\n\n    return pos_x\n\ndef nerf_positional_encoding(\n    tensor, num_encoding_functions=6, include_input=False, log_sampling=True\n) -> torch.Tensor:\n    r\"\"\"Apply positional encoding to the input.\n    Args:\n        tensor (torch.Tensor): Input tensor to be positionally encoded.\n        encoding_size (optional, int): Number of encoding functions used to compute\n            a positional encoding (default: 6).\n        include_input (optional, bool): Whether or not to include the input in the\n            positional encoding (default: True).\n    Returns:\n    (torch.Tensor): Positional encoding of the input tensor.\n    \"\"\"\n    # TESTED\n    # Trivially, the input tensor is added to the positional encoding.\n    encoding = [tensor] if include_input else []\n    frequency_bands = None\n    if log_sampling:\n        frequency_bands = 2.0 ** torch.linspace(\n            0.0,\n            num_encoding_functions - 1,\n            num_encoding_functions,\n            dtype=tensor.dtype,\n            device=tensor.device,\n        )\n    else:\n        frequency_bands = torch.linspace(\n            2.0 ** 0.0,\n            2.0 ** (num_encoding_functions - 1),\n            num_encoding_functions,\n            dtype=tensor.dtype,\n            device=tensor.device,\n        )\n\n    for freq in frequency_bands:\n        for func in [torch.sin, torch.cos]:\n            encoding.append(func(tensor * freq))\n\n    # Special case, for no positional encoding\n    if len(encoding) == 1:\n        return encoding[0]\n    else:\n        return torch.cat(encoding, dim=-1)\n\n\nimport torch\nimport torch.nn as nn\nimport numpy as np\nfrom mmdet.core import bbox_xyxy_to_cxcywh\nfrom mmdet.models.utils.transformer import inverse_sigmoid\n\ndef memory_refresh(memory, prev_exist):\n    memory_shape = memory.shape\n    view_shape = [1 for _ in range(len(memory_shape))]\n    prev_exist = prev_exist.view(-1, *view_shape[1:]) \n    return memory * prev_exist\n    \ndef topk_gather(feat, topk_indexes):\n    if topk_indexes is not None:\n        feat_shape = feat.shape\n        topk_shape = topk_indexes.shape\n        \n        view_shape = [1 for _ in range(len(feat_shape))] \n        view_shape[:2] = topk_shape[:2]\n        topk_indexes = topk_indexes.view(*view_shape)\n        \n        feat = torch.gather(feat, 1, topk_indexes.repeat(1, 1, *feat_shape[2:]))\n    return feat\n\n\ndef apply_ltrb(locations, pred_ltrb): \n        \"\"\"\n        :param locations:  (1, H, W, 2)\n        :param pred_ltrb:  (N, H, W, 4) \n        \"\"\"\n        pred_boxes = torch.zeros_like(pred_ltrb)\n        pred_boxes[..., 0] = (locations[..., 0] - pred_ltrb[..., 0])# x1\n        pred_boxes[..., 1] = (locations[..., 1] - pred_ltrb[..., 1])# y1\n        pred_boxes[..., 2] = (locations[..., 0] + pred_ltrb[..., 2])# x2\n        pred_boxes[..., 3] = (locations[..., 1] + pred_ltrb[..., 3])# y2\n        min_xy = pred_boxes[..., 0].new_tensor(0)\n        max_xy = pred_boxes[..., 0].new_tensor(1)\n        pred_boxes  = torch.where(pred_boxes < min_xy, min_xy, pred_boxes)\n        pred_boxes  = torch.where(pred_boxes > max_xy, max_xy, pred_boxes)\n        pred_boxes = bbox_xyxy_to_cxcywh(pred_boxes)\n\n\n        return pred_boxes    \n\ndef apply_center_offset(locations, center_offset): \n        \"\"\"\n        :param locations:  (1, H, W, 2)\n        :param pred_ltrb:  (N, H, W, 4) \n        \"\"\"\n        centers_2d = torch.zeros_like(center_offset)\n        locations = inverse_sigmoid(locations)\n        centers_2d[..., 0] = locations[..., 0] + center_offset[..., 0]  # x1\n        centers_2d[..., 1] = locations[..., 1] + center_offset[..., 1]  # y1\n        centers_2d = centers_2d.sigmoid()\n\n        return centers_2d\n\n@torch.no_grad()\ndef locations(features, stride, pad_h, pad_w):\n        \"\"\"\n        Arguments:\n            features:  (N, C, H, W)\n        Return:\n            locations:  (H, W, 2)\n        \"\"\"\n\n        h, w = features.size()[-2:]\n        device = features.device\n        \n        shifts_x = (torch.arange(\n            0, stride*w, step=stride,\n            dtype=torch.float32, device=device\n        ) + stride // 2 ) / pad_w\n        shifts_y = (torch.arange(\n            0, h * stride, step=stride,\n            dtype=torch.float32, device=device\n        ) + stride // 2) / pad_h\n        shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)\n        shift_x = shift_x.reshape(-1)\n        shift_y = shift_y.reshape(-1)\n        locations = torch.stack((shift_x, shift_y), dim=1)\n        \n        locations = locations.reshape(h, w, 2)\n        \n        return locations\n\n\n\ndef gaussian_2d(shape, sigma=1.0):\n    \"\"\"Generate gaussian map.\n\n    Args:\n        shape (list[int]): Shape of the map.\n        sigma (float, optional): Sigma to generate gaussian map.\n            Defaults to 1.\n\n    Returns:\n        np.ndarray: Generated gaussian map.\n    \"\"\"\n    m, n = [(ss - 1.) / 2. for ss in shape]\n    y, x = np.ogrid[-m:m + 1, -n:n + 1]\n\n    h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))\n    h[h < np.finfo(h.dtype).eps * h.max()] = 0\n    return h\n\n\ndef draw_heatmap_gaussian(heatmap, center, radius, k=1):\n    \"\"\"Get gaussian masked heatmap.\n\n    Args:\n        heatmap (torch.Tensor): Heatmap to be masked.\n        center (torch.Tensor): Center coord of the heatmap.\n        radius (int): Radius of gaussian.\n        K (int, optional): Multiple of masked_gaussian. Defaults to 1.\n\n    Returns:\n        torch.Tensor: Masked heatmap.\n    \"\"\"\n    diameter = 2 * radius + 1\n    gaussian = gaussian_2d((diameter, diameter), sigma=diameter / 6)\n\n    x, y = int(center[0]), int(center[1])\n\n    height, width = heatmap.shape[0:2]\n\n    left, right = min(x, radius), min(width - x, radius + 1)\n    top, bottom = min(y, radius), min(height - y, radius + 1)\n\n    masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]\n    masked_gaussian = torch.from_numpy(\n        gaussian[radius - top:radius + bottom,\n                 radius - left:radius + right]).to(heatmap.device,\n                                                   torch.float32)\n    if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:\n        torch.max(masked_heatmap, masked_gaussian * k, out=masked_heatmap)\n    return heatmap\n\nclass SELayer_Linear(nn.Module):\n    def __init__(self, channels, act_layer=nn.ReLU, gate_layer=nn.Sigmoid):\n        super().__init__()\n        self.conv_reduce = nn.Linear(channels, channels)\n        self.act1 = act_layer()\n        self.conv_expand = nn.Linear(channels, channels)\n        self.gate = gate_layer()\n\n    def forward(self, x, x_se):\n        x_se = self.conv_reduce(x_se)\n        x_se = self.act1(x_se)\n        x_se = self.conv_expand(x_se)\n        return x * self.gate(x_se)\n        \n\nclass MLN(nn.Module):\n    ''' \n    Args:\n        c_dim (int): dimension of latent code c\n        f_dim (int): feature dimension\n    '''\n\n    def __init__(self, c_dim, f_dim=256):\n        super().__init__()\n        self.c_dim = c_dim\n        self.f_dim = f_dim\n\n        self.reduce = nn.Sequential(\n            nn.Linear(c_dim, f_dim),\n            nn.ReLU(),\n        )\n        self.gamma = nn.Linear(f_dim, f_dim)\n        self.beta = nn.Linear(f_dim, f_dim)\n        self.ln = nn.LayerNorm(f_dim, elementwise_affine=False)\n        self.reset_parameters()\n\n    def reset_parameters(self):\n        nn.init.zeros_(self.gamma.weight)\n        nn.init.zeros_(self.beta.weight)\n        nn.init.ones_(self.gamma.bias)\n        nn.init.zeros_(self.beta.bias)\n\n    def forward(self, x, c):\n        x = self.ln(x)\n        c = self.reduce(c)\n        gamma = self.gamma(c)\n        beta = self.beta(c)\n        out = gamma * x + beta\n\n        return out\n\n\ndef transform_reference_points(reference_points, egopose, reverse=False, translation=True):\n    reference_points = torch.cat([reference_points, torch.ones_like(reference_points[..., 0:1])], dim=-1)\n    if reverse:\n        matrix = egopose.inverse()\n    else:\n        matrix = egopose\n    if not translation:\n        matrix[..., :3, 3] = 0.0\n    if reference_points.dim()==4:\n        B, N, K, C = reference_points.shape\n        reference_points = reference_points.view(B, N*K, C)\n        reference_points = (matrix.unsqueeze(1) @ reference_points.unsqueeze(-1)).squeeze(-1)[..., :3]\n        return reference_points.view(B, N, K, -1)\n    else:\n        reference_points = (matrix.unsqueeze(1) @ reference_points.unsqueeze(-1)).squeeze(-1)[..., :3]\n        return reference_points\n\ndef transform_velo(velo, egopose, reverse=False, translation=False):\n    # velo = torch.cat([velo, torch.ones_like(velo[..., 0:1])], dim=-1)\n    if reverse:\n        matrix = egopose.inverse()\n    else:\n        matrix = egopose\n    if not translation:\n        matrix[..., :3, 3] = 0.0\n    if velo.dim()==4:\n        B, N, K, C = velo.shape\n        velo = velo.view(B, N*K, C)\n        velo = (matrix.unsqueeze(1)[..., :2, :2] @ velo.unsqueeze(-1)).squeeze(-1)\n        return velo.view(B, N, K, -1)\n    else:\n        velo = (matrix.unsqueeze(1)[..., :2, :2] @ velo.unsqueeze(-1)).squeeze(-1)\n        return velo\n"
  },
  {
    "path": "mmdet3d/models/fbbev/track_head/track_nms_free_coder.py",
    "content": "# ------------------------------------------------------------------------\n# Copyright (c) 2023 toyota research instutute.\n# ------------------------------------------------------------------------\n# Modified from DETR3D (https://github.com/WangYueFt/detr3d)\n# Copyright (c) 2021 Wang, Yue\n# ------------------------------------------------------------------------\n# Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d)\n# Copyright (c) OpenMMLab. All rights reserved.\n# ------------------------------------------------------------------------\nimport torch\nfrom mmdet.core.bbox import BaseBBoxCoder\nfrom mmdet.core.bbox.builder import BBOX_CODERS\nfrom .streampetr_utils import denormalize_bbox\nimport torch.nn.functional as F\n\n\n@BBOX_CODERS.register_module()\nclass TrackNMSFreeCoder(BaseBBoxCoder):\n    \"\"\"Bbox coder for NMS-free detector. Including the fields for tracking\n    Args:\n        pc_range (list[float]): Range of point cloud.\n        post_center_range (list[float]): Limit of the center.\n            Default: None.\n        max_num (int): Max number to be kept. Default: 100.\n        score_threshold (float): Threshold to filter boxes based on score.\n            Default: None.\n        code_size (int): Code size of bboxes. Default: 9\n    \"\"\"\n\n    def __init__(self,\n                 pc_range,\n                 voxel_size=None,\n                 post_center_range=None,\n                 max_num=100,\n                 score_threshold=None,\n                 remove_ego_car=False,\n                 num_classes=10):\n        \n        self.pc_range = pc_range\n        self.voxel_size = voxel_size\n        self.post_center_range = post_center_range\n        self.max_num = max_num\n        self.score_threshold = score_threshold\n        self.num_classes = num_classes\n        self.remove_ego_car = remove_ego_car\n\n    def encode(self):\n        pass\n\n    def decode_single(self, cls_scores, bbox_preds, obj_idxes=None, track_scores=None, motion_forecasting=None, masks=None):\n        \"\"\"Decode bboxes.\n        Args:\n            cls_scores (Tensor): Outputs from the classification head, \\\n                shape [num_query, cls_out_channels]. Note \\\n                cls_out_channels should includes background.\n            bbox_preds (Tensor): Outputs from the regression \\\n                head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \\\n                Shape [num_query, 9].\n            obj_idxes (Tensor): The idxes of the track instances\n            track_scores (Tensor): The scores of the bbox\n            motion_forecasting (Tensor): The predicted trajectories, [num_query, T, 2]\n            all_masks (Tensor): The masks for valid query output\n        Returns:\n            list[dict]: Decoded boxes.\n        \"\"\"\n        max_num = self.max_num\n        cls_scores = cls_scores.sigmoid()\n\n        if masks is not None:\n            \"\"\"\n            If we remove the low scores \n            \"\"\"\n            # cls_scores = cls_scores[masks]\n            # bbox_preds = bbox_preds[masks]\n            # obj_idxes = obj_idxes[masks]\n\n            # track_scores = track_scores[masks]\n            det_scores = track_scores.clone()\n            track_scores[~masks] = -1.\n\n            #if motion_forecasting is not None:\n            #    motion_forecasting = motion_forecasting[masks]\n\n        # tracking mode decode\n        if obj_idxes is not None:\n            _, indexs = cls_scores.max(dim=-1)\n            labels = indexs % self.num_classes\n            _, bbox_index = det_scores.topk(min(max_num, len(obj_idxes)))\n            det_scores = det_scores[bbox_index]\n            track_scores = track_scores[bbox_index]\n            obj_idxes = obj_idxes[bbox_index]\n            bbox_preds = bbox_preds[bbox_index]\n            labels = labels[bbox_index]\n            # scores = track_scores\n            if motion_forecasting is not None:\n                motion_forecasting = motion_forecasting[bbox_index]\n        # detection mode decode\n        else:\n            cls_scores_topk = cls_scores.view(-1)\n            # scores, indexs = cls_scores_topk.topk(min(max_num, cls_scores_topk.size(0)))\n            # labels = indexs % self.num_classes\n            det_scores, indexs = cls_scores_topk.topk(min(max_num, cls_scores_topk.size(0)))\n            labels = indexs % self.num_classes\n            bbox_index = torch.div(indexs, self.num_classes, rounding_mode='floor')\n            bbox_preds = bbox_preds[bbox_index]\n\n        final_box_preds = denormalize_bbox(bbox_preds, self.pc_range)   \n        final_scores = det_scores\n        final_preds = labels\n        final_motion_forecasting = motion_forecasting\n\n        # use score threshold\n        if self.score_threshold is not None:\n            thresh_mask = final_scores >= self.score_threshold\n        if self.post_center_range is not None:\n            self.post_center_range = torch.tensor(self.post_center_range, device=final_scores.device)\n           \n            \n            mask = (final_box_preds[..., :3] >=\n                    self.post_center_range[:3]).all(1)\n            mask &= (final_box_preds[..., :3] <=\n                     self.post_center_range[3:]).all(1)\n\n            if self.remove_ego_car:\n                ego_range = torch.tensor([2.5, 1], device=final_scores.device)\n                mask &= (final_box_preds[..., :2].abs() >= ego_range).any(1)\n            if self.score_threshold:\n                mask &= thresh_mask\n\n            boxes3d = final_box_preds[mask]\n            det_scores = final_scores[mask]\n            labels = final_preds[mask]\n            if final_motion_forecasting is not None:\n                motion_forecasting = final_motion_forecasting[mask]\n            if obj_idxes is not None:\n                obj_idxes = obj_idxes[mask]\n            if track_scores is not None:\n                track_scores = track_scores[mask]\n\n            predictions_dict = {\n                'bboxes': boxes3d,\n                'scores': det_scores,\n                'labels': labels,\n                'track_scores': track_scores,\n                'obj_idxes': obj_idxes,\n                'forecasting': motion_forecasting\n            }\n\n        else:\n            raise NotImplementedError(\n                'Need to reorganize output as a batch, only '\n                'support post_center_range is not None for now!')\n        return predictions_dict\n\n    def decode(self, preds_dicts, layer_index=-1):\n        \"\"\"Decode bboxes.\n        Args:\n            all_cls_scores (Tensor): Outputs from the classification head, \\\n                shape [nb_dec, bs, num_query, cls_out_channels]. Note \\\n                cls_out_channels should includes background.\n            all_bbox_preds (Tensor): Sigmoid outputs from the regression \\\n                head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \\\n                Shape [nb_dec, bs, num_query, 9].\n            track_instances (Instances): Instances containing track information. \n                Available for tracking evaluation.\n        Returns:\n            list[dict]: Decoded boxes.\n        \"\"\"\n        all_cls_scores = preds_dicts['all_cls_scores'][layer_index].clone()\n        all_bbox_preds = preds_dicts['all_bbox_preds'][layer_index].clone()\n        \n        batch_size = all_cls_scores.size()[0]\n        if 'track_instances' in preds_dicts.keys():\n            track_instances = preds_dicts['track_instances'].clone()\n            obj_idxes = track_instances.obj_idxes.clone()\n            track_scores = track_instances.scores.clone()\n            if 'all_masks' in preds_dicts.keys():\n                all_masks = preds_dicts['all_masks'].clone()\n            else:\n                all_masks = [None]\n\n            if 'all_motion_forecasting' in preds_dicts.keys() and preds_dicts['all_motion_forecasting'] is not None:\n                motion_forecasting = preds_dicts['all_motion_forecasting'].clone()\n            else:\n                motion_forecasting = [None]\n        else:\n            obj_idxes = [None for _ in range(batch_size)]\n            track_scores = [None for _ in range(batch_size)]\n            motion_forecasting = [None for _ in range(batch_size)]\n            all_masks = [None for _ in range(batch_size)]\n\n        predictions_list = []\n        for i in range(batch_size):\n            predictions_list.append(self.decode_single(\n                all_cls_scores[i], all_bbox_preds[i], obj_idxes[i], track_scores[i], \n                motion_forecasting[i], all_masks[i]))\n        return predictions_list"
  },
  {
    "path": "mmdet3d/models/fbbev/track_head/trackpetr.py",
    "content": "# Copyright (c) 2023-2024, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# TODO: add license here\n\n\nimport torch\nimport torch.nn as nn \nfrom mmcv.cnn import Linear, bias_init_with_prob, Scale\n\nfrom mmcv.runner import force_fp32\nfrom mmdet.core import (build_assigner, build_sampler, multi_apply,\n                        reduce_mean)\nfrom mmdet.models.utils import build_transformer\nfrom mmdet.models import HEADS, build_loss\nfrom mmdet.models.dense_heads.anchor_free_head import AnchorFreeHead\nfrom mmdet.models.utils.transformer import inverse_sigmoid\nfrom mmdet3d.core.bbox.coders import build_bbox_coder\nfrom .streampetr_utils import *\nfrom .instances import Instances\nfrom .runtime_tracker import RunTimeTracker\nimport copy\nfrom mmdet.models.utils import NormedLinear\nfrom mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d\nfrom mmdet3d.models.fbbev.utils import save_tensor\n\n@HEADS.register_module()\nclass TackerHead(AnchorFreeHead):\n    \"\"\"Implements the DETR transformer head.\n    See `paper: End-to-End Object Detection with Transformers\n    <https://arxiv.org/pdf/2005.12872>`_ for details.\n    Args:\n        num_classes (int): Number of categories excluding the background.\n        in_channels (int): Number of channels in the input feature map.\n        num_query (int): Number of query in Transformer.\n        num_reg_fcs (int, optional): Number of fully-connected layers used in\n            `FFN`, which is then used for the regression head. Default 2.\n        transformer (obj:`mmcv.ConfigDict`|dict): Config for transformer.\n            Default: None.\n        sync_cls_avg_factor (bool): Whether to sync the avg_factor of\n            all ranks. Default to False.\n        positional_encoding (obj:`mmcv.ConfigDict`|dict):\n            Config for position encoding.\n        loss_cls (obj:`mmcv.ConfigDict`|dict): Config of the\n            classification loss. Default `CrossEntropyLoss`.\n        loss_bbox (obj:`mmcv.ConfigDict`|dict): Config of the\n            regression loss. Default `L1Loss`.\n        loss_iou (obj:`mmcv.ConfigDict`|dict): Config of the\n            regression iou loss. Default `GIoULoss`.\n        tran_cfg (obj:`mmcv.ConfigDict`|dict): Training config of\n            transformer head.\n        test_cfg (obj:`mmcv.ConfigDict`|dict): Testing config of\n            transformer head.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n    _version = 2\n\n    def __init__(self,\n                 num_classes,\n                 in_channels=256,\n                 stride=[16],\n                 embed_dims=256,\n                 num_query=100,\n                 num_reg_fcs=2,\n                 memory_len=1024,\n                 topk_proposals=256,\n                 num_propagated=256,\n                 with_dn=True,\n                 with_ego_pos=True,\n                 match_with_velo=True,\n                 match_costs=None,\n                 transformer=None,\n                 sync_cls_avg_factor=False,\n                 code_weights=None,\n                 bbox_coder=None,\n                loss=dict(\n                    type='TrackingLossCombo',\n                    num_classes=10,\n                    interm_loss=True,\n                    code_weights = [2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],\n                    loss_cls=dict(\n                        type='FocalLoss',\n                        use_sigmoid=True,\n                        gamma=2.0,\n                        alpha=0.25,\n                        loss_weight=2.0),\n                    loss_bbox=dict(type='L1Loss', loss_weight=0.25),\n                    loss_iou=dict(type='GIoULoss', loss_weight=0.0),\n                    # loss_prediction=dict(type='L1Loss', loss_weight=0.5),\n                    assigner=dict(\n                        type='HungarianAssigner3D',\n                        cls_cost=dict(type='FocalLossCost', weight=2.0),\n                        reg_cost=dict(type='BBox3DL1Cost', weight=0.25),\n                        iou_cost=dict(type='IoUCost', weight=0.0), # Fake cost. This is just to make it compatible with DETR head. \n                    pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0])\n                    ),\n                    train_cfg=dict(\n                     assigner=dict(\n                         type='HungarianAssigner3D',\n                         cls_cost=dict(type='ClassificationCost', weight=1.),\n                         reg_cost=dict(type='BBoxL1Cost', weight=5.0),\n                         iou_cost=dict(\n                             type='IoUCost', iou_mode='giou', weight=2.0)),),\n                 test_cfg=dict(max_per_img=100),\n                 scalar = 5,\n                 noise_scale = 0.4,\n                 noise_trans = 0.0,\n                 dn_weight = 1.0,\n                 split = 0.5,\n                 init_cfg=None,\n                 normedlinear=False,\n                 runtime_tracker=dict(\n                        output_threshold=0.2,\n                        score_threshold=0.2,\n                        record_threshold=0.4,\n                        max_age_since_update=7),\n                 tracking=True,\n                 layer_index=-1,\n                 **kwargs):\n        # NOTE here use `AnchorFreeHead` instead of `TransformerHead`,\n        # since it brings inconvenience when the initialization of\n        # `AnchorFreeHead` is called.\n            \n        self.num_query = num_query\n        self.num_classes = num_classes\n        self.in_channels = in_channels\n        self.memory_len = memory_len\n        self.topk_proposals = topk_proposals\n        self.num_propagated = num_propagated\n        self.with_dn = with_dn\n        self.with_ego_pos = with_ego_pos\n        self.match_with_velo = match_with_velo\n        self.num_reg_fcs = num_reg_fcs\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.fp16_enabled = False\n        self.embed_dims = embed_dims\n        self.with_dn = with_dn\n        self.stride=stride\n        self.layer_index = layer_index\n        if 'code_size' in kwargs:\n            self.code_size = kwargs['code_size']\n        else:\n            self.code_size = 10\n\n        self.scalar = scalar\n        self.bbox_noise_scale = noise_scale\n        self.bbox_noise_trans = noise_trans\n        self.dn_weight = dn_weight\n        self.split = split \n\n        self.act_cfg = transformer.get('act_cfg',\n                                       dict(type='ReLU', inplace=True))\n        self.num_pred = transformer['decoder']['num_layers']\n        self.normedlinear = normedlinear\n        self.tracking = tracking \n        super(TackerHead, self).__init__(num_classes, in_channels, init_cfg = init_cfg)\n\n        self.criterion = build_loss(loss)\n\n        self.transformer = build_transformer(transformer)\n\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n\n        self.pc_range = nn.Parameter(torch.tensor(\n            self.bbox_coder.pc_range), requires_grad=False)\n\n        self._init_layers()\n        self.reset_history_track_instances()\n        \n        self.count = 0\n        \n        self.hist_len = 4\n        # self.fut_len = 8\n        if runtime_tracker:\n            self.runtime_tracker = RunTimeTracker(**runtime_tracker)\n            self.runtime_tracker.empty()\n\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the transformer head.\"\"\"\n        cls_branch = []\n        for _ in range(self.num_reg_fcs):\n            cls_branch.append(Linear(self.embed_dims, self.embed_dims))\n            cls_branch.append(nn.LayerNorm(self.embed_dims))\n            cls_branch.append(nn.ReLU(inplace=True))\n        if self.normedlinear:\n            cls_branch.append(NormedLinear(self.embed_dims, self.cls_out_channels))\n        else:\n            cls_branch.append(Linear(self.embed_dims, self.cls_out_channels))\n        fc_cls = nn.Sequential(*cls_branch)\n\n        reg_branch = []\n        for _ in range(self.num_reg_fcs):\n            reg_branch.append(Linear(self.embed_dims, self.embed_dims))\n            reg_branch.append(nn.ReLU())\n        reg_branch.append(Linear(self.embed_dims, self.code_size))\n        reg_branch = nn.Sequential(*reg_branch)\n\n        # self.cls_branches = nn.ModuleList(\n        #     [fc_cls for _ in range(self.num_pred)])\n        # self.reg_branches = nn.ModuleList(\n        #     [reg_branch for _ in range(self.num_pred)])\n\n        def _get_clones(module, N):\n            return nn.ModuleList([copy.deepcopy(module) for i in range(N)])\n\n\n        self.cls_branches =_get_clones(fc_cls, self.num_pred)\n        self.reg_branches = _get_clones(reg_branch, self.num_pred)\n        self.reference_points = nn.Embedding(self.num_query, 3)\n        if self.num_propagated > 0:\n            self.pseudo_reference_points = nn.Embedding(self.num_propagated, 3)\n\n\n        self.query_embedding = nn.Sequential(\n            nn.Linear(self.embed_dims*3//2, self.embed_dims),\n            nn.ReLU(),\n            nn.Linear(self.embed_dims, self.embed_dims),\n        )\n        if self.tracking:\n            self.query_feat_embedding = nn.Embedding(self.num_query, self.embed_dims)\n        # self.spatial_alignment = MLN(14, use_ln=False)\n\n        self.time_embedding = nn.Sequential(\n            nn.Linear(self.embed_dims, self.embed_dims),\n            nn.LayerNorm(self.embed_dims)\n        )\n\n        # encoding ego pose\n        if self.with_ego_pos:\n            self.ego_pose_pe = MLN(180)\n            self.ego_pose_memory = MLN(180)\n\n    def init_weights(self):\n        \"\"\"Initialize weights of the transformer head.\"\"\"\n        # The initialization for transformer is important\n        nn.init.uniform_(self.reference_points.weight.data, 0, 1)\n        if self.num_propagated > 0:\n            nn.init.uniform_(self.pseudo_reference_points.weight.data, 0, 1)\n            self.pseudo_reference_points.weight.requires_grad = False\n        self.transformer.init_weights()\n        if self.loss_cls.use_sigmoid:\n            bias_init = bias_init_with_prob(0.01)\n            for m in self.cls_branches:\n                nn.init.constant_(m[-1].bias, bias_init)\n\n\n    def reset_history_track_instances(self):\n        self.history_track_instances = None\n\n    def generate_empty_instance(self, B, init_memory_instances=False):\n        \"\"\"Generate empty instance slots at the beginning of tracking\"\"\"\n        track_instances = Instances((1, 1))\n        device = self.reference_points.weight.device\n        \"\"\"Detection queries\"\"\"\n        # reference points, query embeds, and query targets (features)\n        if init_memory_instances:\n            reference_points = self.reference_points.weight.new_zeros(self.memory_len, 3)[None].repeat(B, 1, 1)\n            len_track_instances = self.memory_len\n        else:\n            reference_points = self.reference_points.weight[None].repeat(B, 1, 1)\n            len_track_instances = self.num_query\n        query_pos = self.query_embedding(pos2posemb3d(reference_points))\n        track_instances.reference_points = reference_points.clone()\n        track_instances.query_pos = query_pos.clone()\n        if self.tracking:\n            if init_memory_instances:\n                track_instances.query_feats = self.query_feat_embedding.weight.new_zeros(len_track_instances, self.embed_dims)[None].repeat(B, 1, 1)\n            else:\n                track_instances.query_feats = self.query_feat_embedding.weight.clone()[None].repeat(B, 1, 1)     \n        else:\n            track_instances.query_feats = torch.zeros_like(query_pos)\n\n        \"\"\" StreamPETR memory information\"\"\"\n        track_instances.timestamp = torch.zeros(B, len_track_instances, 1, dtype=torch.float, device=device)\n        track_instances.ego_pose = torch.zeros(B, len_track_instances, 4, 4, dtype=torch.float, device=device)\n        track_instances.velo = torch.zeros(B, len_track_instances, 2, dtype=torch.float, device=device)\n\n        \"\"\"Tracking information\"\"\"\n        # id for the tracks\n        track_instances.obj_idxes = torch.full(\n            (B, len_track_instances,), -1, dtype=torch.long, device=device)\n        # matched gt indexes, for loss computation\n        track_instances.matched_gt_idxes = torch.full(\n            (B, len_track_instances,), -1, dtype=torch.long, device=device)\n        # life cycle management\n        track_instances.disappear_time = torch.zeros(\n            (B, len_track_instances, ), dtype=torch.long, device=device)\n        track_instances.track_query_mask = torch.zeros(\n            (B, len_track_instances, ), dtype=torch.bool, device=device)\n        \n        \"\"\"Current frame information\"\"\"\n        # classification scores\n        track_instances.logits = torch.zeros(\n            (B, len_track_instances, self.num_classes), dtype=torch.float, device=device)\n        # bounding boxes\n        track_instances.bboxes = torch.zeros(\n            (B, len_track_instances, 10), dtype=torch.float, device=device)\n        # track scores, normally the scores for the highest class\n        track_instances.scores = torch.zeros(\n            (B, len_track_instances, 1), dtype=torch.float, device=device)\n        \n        # # motion prediction, not normalized\n        # track_instances.motion_predictions = torch.zeros(\n        #     (B, len_track_instances, self.fut_len, 2), dtype=torch.float, device=device)\n        # \"\"\"Cache for current frame information, loading temporary data for spatial-temporal reasoining\"\"\"\n        # track_instances.cache_logits = torch.zeros(\n        #     (B, len_track_instances, self.num_classes), dtype=torch.float, device=device)\n        # track_instances.cache_bboxes = torch.zeros(\n        #     (B, len_track_instances, 10), dtype=torch.float, device=device)\n        # track_instances.cache_scores = torch.zeros(\n        #     (B, len_track_instances,), dtype=torch.float, device=device)\n        # track_instances.cache_reference_points = reference_points.clone()\n        # track_instances.cache_query_pos = query_pos.clone()\n        # if self.tracking:\n        #     track_instances.cache_query_feats = self.query_feat_embedding.weight.clone()[None].repeat(B, 1, 1)\n        # else:\n        #     track_instances.cache_query_feats = torch.zeros_like(query_pos)\n        # track_instances.cache_motion_predictions = torch.zeros_like(track_instances.motion_predictions)\n        # \"\"\"History Reasoning\"\"\"\n        # # embeddings\n        track_instances.hist_query_feats = torch.zeros(\n            (B, len_track_instances, self.hist_len, self.embed_dims), dtype=torch.float32, device=device)\n        # # padding mask, follow MultiHeadAttention, 1 indicates padded\n        # track_instances.hist_padding_masks = torch.ones(\n        #     (B, len_track_instances, self.hist_len), dtype=torch.bool, device=device)\n        # # positions, global coord\n        track_instances.hist_xyz = torch.zeros(\n            (B, len_track_instances, self.hist_len, 3), dtype=torch.float, device=device)\n        # # positional embeds\n        # track_instances.hist_position_embeds = torch.zeros(\n        #     (B, len_track_instances, self.hist_len, self.embed_dims), dtype=torch.float32, device=device)\n        # # bboxes\n        track_instances.hist_velo = torch.zeros(\n           (B, len_track_instances, self.hist_len, 2), dtype=torch.float, device=device)\n        \n\n        track_instances.hist_mask = torch.zeros(\n           (B, len_track_instances, self.hist_len), dtype=torch.float, device=device)\n\n        # # logits\n        # track_instances.hist_logits = torch.zeros(\n        #     (B, len_track_instances, self.hist_len, self.num_classes), dtype=torch.float, device=device)\n        # # scores\n        # track_instances.hist_scores = torch.zeros(\n        #     (B, len_track_instances, self.hist_len), dtype=torch.float, device=device)\n\n        # \"\"\"Future Reasoning\"\"\"\n        # # embeddings\n        # track_instances.fut_embeds = torch.zeros(\n        #     (B, len_track_instances, self.fut_len, self.embed_dims), dtype=torch.float32, device=device)\n        # # padding mask, follow MultiHeadAttention, 1 indicates padded\n        # track_instances.fut_padding_masks = torch.ones(\n        #     (B, len_track_instances, self.fut_len), dtype=torch.bool, device=device)\n        # # positions\n        # track_instances.fut_xyz = torch.zeros(\n        #     (B, len_track_instances, self.fut_len, 3), dtype=torch.float, device=device)\n        # # positional embeds\n        # track_instances.fut_position_embeds = torch.zeros(\n        #     (B, len_track_instances, self.fut_len, self.embed_dims), dtype=torch.float32, device=device)\n        # # bboxes\n        # track_instances.fut_bboxes = torch.zeros(\n        #     (B, len_track_instances, self.fut_len, 10), dtype=torch.float, device=device)\n        # # logits\n        # track_instances.fut_logits = torch.zeros(\n        #     (B, len_track_instances, self.fut_len, self.num_classes), dtype=torch.float, device=device)\n        # # scores\n        # track_instances.fut_scores = torch.zeros(\n        #     (B, len_track_instances, self.fut_len), dtype=torch.float, device=device)\n        return track_instances\n\n    def instance_temporal_alignment(self):\n        B = self.track_instances.query_pos.size(0)\n        temp_history_track_instances = self.history_track_instances.clone()\n        temp_reference_points = (temp_history_track_instances.reference_points - self.pc_range[:3]) / (self.pc_range[3:6] - self.pc_range[0:3])\n\n        temp_history_track_instances.query_pos = self.query_embedding(pos2posemb3d(temp_reference_points)) \n        rec_ego_pose = torch.eye(4, device= self.track_instances.query_pos.device).unsqueeze(0).unsqueeze(0).repeat(B,  self.track_instances.query_pos.size(1), 1, 1)\n        tmp_ego_pose = torch.eye(4, device= self.track_instances.query_pos.device).unsqueeze(0).unsqueeze(0).repeat(B,  temp_history_track_instances.query_pos.size(1), 1, 1)\n        if self.with_ego_pos:\n            \"current ego pose\"\n            rec_ego_motion = torch.cat([torch.zeros_like(self.track_instances.reference_points[...,:3]), rec_ego_pose[..., :3, :].flatten(-2)], dim=-1)\n            rec_ego_motion = nerf_positional_encoding(rec_ego_motion)\n            self.track_instances.query_pos = self.ego_pose_pe(self.track_instances.query_pos, rec_ego_motion)\n            self.track_instances.query_feats = self.ego_pose_memory(self.track_instances.query_feats, rec_ego_motion)\n            \n            \"memory ego pose\"\n            memory_ego_motion = torch.cat([ temp_history_track_instances.velo, temp_history_track_instances.timestamp,  tmp_ego_pose[..., :3, :].flatten(-2)], dim=-1).float()\n            memory_ego_motion = nerf_positional_encoding(memory_ego_motion)\n\n            temp_history_track_instances.query_pos = self.ego_pose_pe(temp_history_track_instances.query_pos, memory_ego_motion)\n            temp_history_track_instances.query_feats = self.ego_pose_memory(temp_history_track_instances.query_feats, memory_ego_motion)\n\n        self.track_instances.query_pos += self.time_embedding(pos2posemb1d(torch.zeros_like(self.track_instances.reference_points[...,:1])))\n        temp_history_track_instances.query_pos += self.time_embedding(pos2posemb1d(temp_history_track_instances.timestamp).float())\n        \n        if self.num_propagated > 0:\n            reference_points = torch.cat([self.track_instances.reference_points, temp_reference_points[:, :self.num_propagated]], dim=1)\n            self.track_instances = Instances.cat([self.track_instances, temp_history_track_instances[:, :self.num_propagated]], dim=1)\n            temp_history_track_instances = temp_history_track_instances[:, self.num_propagated:]\n            temp_reference_points = temp_reference_points[:, self.num_propagated:]\n            rec_ego_pose = torch.eye(4, device=self.track_instances.query_pos.device).unsqueeze(0).unsqueeze(0).repeat(B, self.track_instances.query_pos.shape[1], 1, 1)\n            \n        return reference_points, temp_history_track_instances, temp_reference_points, rec_ego_pose\n\n    def pre_update_instances(self, data):\n        x = 1-data['start_of_sequence'] # original prev_exist, so we need do `not`\n        B = x.size(0)\n        self.track_instances = self.generate_empty_instance(B, init_memory_instances=False)\n        if self.history_track_instances is None:\n            self.history_track_instances = self.generate_empty_instance(B, init_memory_instances=True)\n        else:\n            self.history_track_instances.timestamp += data['timestamp'].unsqueeze(-1).unsqueeze(-1)\n            self.history_track_instances.ego_pose = data['ego_pose_inv'].unsqueeze(1) @ self.history_track_instances.ego_pose\n            self.history_track_instances.reference_points = transform_reference_points(self.history_track_instances.reference_points, data['ego_pose_inv'], reverse=False)\n            \n            ## hist\n            self.history_track_instances.hist_xyz = transform_reference_points(self.history_track_instances.hist_xyz, data['ego_pose_inv'], reverse=False)\n            self.history_track_instances.hist_velo = transform_velo(self.history_track_instances.hist_velo,  data['ego_pose_inv'], reverse=False)\n            # hist\n\n            self.history_track_instances = self.history_track_instances[:, :self.memory_len]\n            \n            if data['start_of_sequence'].any():\n                self.history_track_instances.timestamp = memory_refresh(self.history_track_instances.timestamp, x)\n                self.history_track_instances.reference_points = memory_refresh(self.history_track_instances.reference_points, x)\n                self.history_track_instances.query_feats = memory_refresh(self.history_track_instances.query_feats, x)\n                self.history_track_instances.ego_pose = memory_refresh(self.history_track_instances.ego_pose, x)\n                self.history_track_instances.velo = memory_refresh(self.history_track_instances.velo, x)\n                self.history_track_instances.scores = memory_refresh(self.history_track_instances.scores, x)\n\n                ## hist\n                self.history_track_instances.hist_xyz = memory_refresh(self.history_track_instances.hist_xyz, x)\n                self.history_track_instances.hist_velo = memory_refresh(self.history_track_instances.hist_velo, x)\n                self.history_track_instances.hist_mask = memory_refresh(self.history_track_instances.hist_mask, x)\n                ##\n                device = self.reference_points.weight.device\n              \n                self.history_track_instances.matched_gt_idxes = (memory_refresh(self.history_track_instances.matched_gt_idxes, x) + (1 - x).view(B, 1) *  torch.full(\n                    (B, self.memory_len,), -1, dtype=torch.long, device=device)).to(torch.long)\n                self.history_track_instances.obj_idxes = (memory_refresh(self.history_track_instances.obj_idxes, x) + (1 - x).view(B, 1) *  torch.full(\n                    (B, self.memory_len,), -1, dtype=torch.long, device=device)).to(torch.long)\n        # for the first frame, padding pseudo_reference_points (non-learnable)\n        if self.num_propagated > 0:\n            pseudo_reference_points = self.pseudo_reference_points.weight * (self.pc_range[3:6] - self.pc_range[0:3]) + self.pc_range[0:3]\n            self.history_track_instances.reference_points[:, :self.num_propagated] = self.history_track_instances.reference_points[:, :self.num_propagated] + (1 - x).view(B, 1, 1) * pseudo_reference_points\n            self.history_track_instances.ego_pose[:, :self.num_propagated] = self.history_track_instances.ego_pose[:, :self.num_propagated] + (1 - x).view(B, 1, 1, 1) * torch.eye(4, device=x.device)\n\n    def post_update_instances(self, data, rec_ego_pose, all_cls_scores, all_bbox_preds, outs_dec, mask_dict):\n\n        if self.training and mask_dict and mask_dict['pad_size'] > 0:\n            rec_reference_points = all_bbox_preds[:, :, mask_dict['pad_size']:, :3][self.layer_index]\n            rec_velo = all_bbox_preds[:, :, mask_dict['pad_size']:, -2:][self.layer_index]\n            rec_memory = outs_dec[:, :, mask_dict['pad_size']:, :][self.layer_index]\n            rec_score = all_cls_scores[:, :, mask_dict['pad_size']:, :][self.layer_index].sigmoid().topk(1, dim=-1).values[..., 0:1]\n            rec_timestamp = torch.zeros_like(rec_score, dtype=torch.float64)\n            rec_bboxes = all_bbox_preds[:, :, mask_dict['pad_size']:, :][self.layer_index]\n        else:\n            rec_reference_points = all_bbox_preds[..., :3][self.layer_index]\n            rec_velo = all_bbox_preds[..., -2:][self.layer_index]\n            rec_memory = outs_dec[self.layer_index]\n            rec_score = all_cls_scores[self.layer_index].sigmoid().topk(1, dim=-1).values[..., 0:1]\n            rec_timestamp = torch.zeros_like(rec_score, dtype=torch.float64)\n            rec_bboxes = all_bbox_preds[self.layer_index]\n        \n        # topk proposals\n        self.track_instances.timestamp = rec_timestamp\n        self.track_instances.query_feats = rec_memory\n        self.track_instances.ego_pose = rec_ego_pose\n        self.track_instances.velo = rec_velo \n        self.track_instances.reference_points = rec_reference_points\n        self.track_instances.scores = rec_score\n        self.track_instances.bboxes = rec_bboxes\n\n        ## update hist\n        self.track_instances.hist_xyz =  torch.cat([self.track_instances.hist_xyz[:, :, 1:], rec_reference_points.unsqueeze(-2)], -2)\n        self.track_instances.hist_velo =  torch.cat([self.track_instances.hist_velo[:, :, 1:], rec_velo.unsqueeze(-2)], -2)\n        self.track_instances.hist_query_feats =  torch.cat([self.track_instances.hist_query_feats[:, :, 1:], rec_memory.unsqueeze(-2)], -2)\n        self.track_instances.hist_mask[..., -1] = 1\n\n\n    def post_merge_instances(self, data, kept_indicator=0): \n        \"\"\"During training, we kept all activate instances, so the mergeing part should be after the assignment.\n        \"\"\"\n        active_instances = (self.track_instances.matched_gt_idxes>=kept_indicator).nonzero()\n\n        B = len(self.track_instances)\n        topk_indexes_list = []\n        for i in range(B):\n\n            active_idxes_i = active_instances[active_instances[:, 0] == i][:, 1]\n            scores = self.track_instances.scores[i:i+1].clone()\n            scores[:, active_idxes_i] = -1\n            \n            _, topk_indexes = torch.topk(scores, self.topk_proposals - min(len(active_idxes_i), self.topk_proposals), dim=1)\n\n            self.track_instances.obj_idxes[i, topk_indexes[0, :, 0]] = -1\n            topk_indexes_list.append(torch.cat([active_idxes_i[None, :, None], topk_indexes], 1))\n\n        topk_indexes = torch.cat(topk_indexes_list)\n        # valid_key_set = ['reference_points', 'query_pos', 'query_feats', 'timestamp', 'velo', 'ego_pose', 'obj_idxes', 'matched_gt_idxes', 'disappear_time']\n        topk_instances = self.track_instances.instances_topk_gather(topk_indexes, valid_key_set=None)\n        re_track_instances = Instances.detach(topk_instances)\n        self.history_track_instances = Instances.cat([re_track_instances, self.history_track_instances], dim=1)\n        # self.memory_reference_point_copy = self.memory_reference_point.clone()\n\n        self.history_track_instances.reference_points = transform_reference_points(self.history_track_instances.reference_points, data['ego_pose'], reverse=False)\n        self.history_track_instances.timestamp -= data['timestamp'].unsqueeze(-1).unsqueeze(-1)\n        self.history_track_instances.ego_pose = data['ego_pose'].unsqueeze(1) @ self.history_track_instances.ego_pose\n    \n        self.history_track_instances.hist_xyz = transform_reference_points(self.history_track_instances.hist_xyz, data['ego_pose'], reverse=False)\n        self.history_track_instances.hist_velo = transform_velo(self.history_track_instances.hist_velo, data['ego_pose'], reverse=False)\n        return topk_instances\n\n    def forward(self, input_dict, img_metas,  gt_bboxes_3d=None, gt_labels_3d=None, debug_info=None):\n        \"\"\"Forward function.\n        Args:\n            mlvl_feats (tuple[Tensor]): Features from the upstream\n                network, each is a 5D-tensor with shape\n                (B, N, C, H, W).\n        Returns:\n            all_cls_scores (Tensor): Outputs from the classification head, \\\n                shape [nb_dec, bs, num_query, cls_out_channels]. Note \\\n                cls_out_channels should includes background.\n            all_bbox_preds (Tensor): Sigmoid outputs from the regression \\\n                head with normalized coordinate format (cx, cy, w, l, cz, h, theta, vx, vy). \\\n                Shape [nb_dec, bs, num_query, 9].\n        \"\"\"\n\n        start_of_sequence = torch.FloatTensor([\n            single_img_metas['start_of_sequence'] \n            for single_img_metas in img_metas]).to(input_dict['img_bev_feat'][0].device)\n\n        timestamp = torch.FloatTensor([\n            single_img_metas['timestamp'] \n            for single_img_metas in img_metas]).to(input_dict['img_bev_feat'][0].device)\n\n        ego_pose_inv = torch.stack([\n            single_img_metas['ego_pose_inv'] \n            for single_img_metas in img_metas], 0).to(input_dict['img_bev_feat'][0].device)\n\n        ego_pose = torch.stack([\n            single_img_metas['ego_pose'] \n            for single_img_metas in img_metas], 0).to(input_dict['img_bev_feat'][0].device)\n\n        data = dict(\n            start_of_sequence = start_of_sequence,\n            timestamp = timestamp,\n            ego_pose_inv = ego_pose_inv,\n            ego_pose = ego_pose,\n        )\n\n        if input_dict['img_bev_feat'][0].dim() == 5:\n            mlvl_feats = [level.mean(-1) for level in input_dict['img_bev_feat']]\n        else:\n            mlvl_feats = input_dict['img_bev_feat']\n\n        # self.pre_update_memory(data)\n        self.pre_update_instances(data)\n        # mlvl_feats = data['img_feats']\n        B = mlvl_feats[0].size(0)\n\n\n        # reference_points = self.reference_points.weight\n        dtype = self.track_instances.reference_points.dtype\n\n        feat_flatten = []\n        spatial_flatten = []\n        for i in range(len(mlvl_feats)):\n            B, C, H, W = mlvl_feats[i].shape\n            mlvl_feat = mlvl_feats[i].reshape(B, C, -1).transpose(1, 2)\n            # mlvl_feat = self.spatial_alignment(mlvl_feat, mln_input)\n            feat_flatten.append(mlvl_feat.to(dtype))\n            spatial_flatten.append((H, W))\n        feat_flatten = torch.cat(feat_flatten, dim=1)\n        spatial_flatten = torch.as_tensor(spatial_flatten, dtype=torch.long, device=mlvl_feats[0].device)\n        level_start_index = torch.cat((spatial_flatten.new_zeros((1, )), spatial_flatten.prod(1).cumsum(0)[:-1]))\n        # reference_points, attn_mask, mask_dict = self.prepare_for_dn(B, reference_points, img_metas,  gt_bboxes_3d, gt_labels_3d)\n        attn_mask, mask_dict = None, None\n        # prepare for the tgt and query_pos using mln.\n        reference_points, temp_history_track_instances, temp_reference_points, rec_ego_pose = self.instance_temporal_alignment()\n\n        tgt = self.track_instances.query_feats\n        query_pos = self.track_instances.query_pos\n        # reference_points = self.track_instances.reference_points\n        temp_pos = temp_history_track_instances.query_pos\n        temp_memory = temp_history_track_instances.query_feats\n        \n\n        outs_dec, intermediate_reference_points = self.transformer(tgt, query_pos, feat_flatten, spatial_flatten, level_start_index, temp_memory, \n                                    temp_pos, attn_mask, reference_points, self.pc_range, data, img_metas, reg_branches=self.reg_branches,\n                                    return_intermediate_pts=True,\n                                    query_embedding=self.query_embedding,\n                                    temp_reference_points=temp_reference_points)\n\n        outs_dec = torch.nan_to_num(outs_dec)\n        outputs_classes = []\n        outputs_coords = []\n        for lvl in range(outs_dec.shape[0]):\n            reference = inverse_sigmoid(intermediate_reference_points[lvl])\n            assert reference.shape[-1] == 3\n            outputs_class = self.cls_branches[lvl](outs_dec[lvl])\n            tmp = self.reg_branches[lvl](outs_dec[lvl])\n            tmp[..., 0:3] += reference[..., 0:3]\n            tmp[..., 0:3] = tmp[..., 0:3].sigmoid()\n            outputs_coord = tmp\n            outputs_classes.append(outputs_class)\n            outputs_coords.append(outputs_coord)\n\n        all_cls_scores = torch.stack(outputs_classes)\n        all_bbox_preds = torch.stack(outputs_coords)\n        all_bbox_preds[..., 0:3] = (all_bbox_preds[..., 0:3] * (self.pc_range[3:6] - self.pc_range[0:3]) + self.pc_range[0:3])\n        \n        # update the memory bank\n        self.post_update_instances(data, rec_ego_pose, all_cls_scores, all_bbox_preds, outs_dec, mask_dict)\n\n        if mask_dict and mask_dict['pad_size'] > 0:\n            assert False\n            output_known_class = all_cls_scores[:, :, :mask_dict['pad_size'], :]\n            output_known_coord = all_bbox_preds[:, :, :mask_dict['pad_size'], :]\n            outputs_class = all_cls_scores[:, :, mask_dict['pad_size']:, :]\n            outputs_coord = all_bbox_preds[:, :, mask_dict['pad_size']:, :]\n            mask_dict['output_known_lbs_bboxes']=(output_known_class, output_known_coord)\n            outs = {\n                'all_cls_scores': outputs_class,\n                'all_bbox_preds': outputs_coord,\n                'dn_mask_dict':mask_dict,\n            }\n        else:\n            outs = {\n                'agent_queries': self.track_instances.query_feats,\n                'all_cls_scores': all_cls_scores,\n                'all_bbox_preds': all_bbox_preds,\n                'dn_mask_dict':None,\n                'track_instances': self.track_instances,\n                'data': data\n            }\n\n        return outs\n\n   \n    @force_fp32(apply_to=('preds_dicts'))\n    def loss(self,\n             gt_bboxes_list,\n             gt_labels_list,\n             preds_dicts,\n             img_metas=None,\n             gt_bboxes_ignore=None):\n        \"\"\"\"Loss function.\n        Args:\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes for each image\n                with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels_list (list[Tensor]): Ground truth class indexes for each\n                image with shape (num_gts, ).\n            preds_dicts:\n                all_cls_scores (Tensor): Classification score of all\n                    decoder layers, has shape\n                    [nb_dec, bs, num_query, cls_out_channels].\n                all_bbox_preds (Tensor): Sigmoid regression\n                    outputs of all decode layers. Each is a 4D-tensor with\n                    normalized coordinate format (cx, cy, w, h) and shape\n                    [nb_dec, bs, num_query, 4].\n                enc_cls_scores (Tensor): Classification scores of\n                    points on encode feature map , has shape\n                    (N, h*w, num_classes). Only be passed when as_two_stage is\n                    True, otherwise is None.\n                enc_bbox_preds (Tensor): Regression results of each points\n                    on the encode feature map, has shape (N, h*w, 4). Only be\n                    passed when as_two_stage is True, otherwise is None.\n            gt_bboxes_ignore (list[Tensor], optional): Bounding boxes\n                which can be ignored for each image. Default None.\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n\n\n        instance_inds = [ single_img_metas['instance_inds'] for single_img_metas in img_metas]\n        loss = self.criterion.loss_single_frame(0,\n             gt_bboxes_list,\n             gt_labels_list,\n             instance_inds,\n             preds_dicts,\n             gt_bboxes_ignore)\n        topk_instances = self.post_merge_instances(preds_dicts['data'])\n        return loss, topk_instances\n    \n    def get_targets(self):\n        pass\n    \n    def forward_tracking(self, input_dict, img_metas):\n        pred_dicts = self.forward(input_dict, img_metas)\n        # prev_active_track_instances = self.runtime_tracker.track_instances\n        track_instances= pred_dicts['track_instances']\n\n        # assign ids\n        # active_mask = (track_instances.scores > self.runtime_tracker.threshold)\n        B =  len(track_instances)\n        appear_mask = (track_instances.obj_idxes< 0) & (track_instances.scores[..., 0] > self.runtime_tracker.threshold)        \n        kept_mask = (track_instances.obj_idxes>=0) & (track_instances.scores[..., 0] > self.runtime_tracker.threshold)\n        disappear_mask = (track_instances.obj_idxes>=0) & (track_instances.scores[..., 0] <= self.runtime_tracker.threshold)\n        non_mask =  (track_instances.obj_idxes<0) & (track_instances.scores[..., 0] <= self.runtime_tracker.threshold)\n        track_instances.matched_gt_idxes[appear_mask|kept_mask] = 1\n        track_instances.matched_gt_idxes[disappear_mask] -= 1\n        track_instances.matched_gt_idxes[non_mask] = -10000\n        track_instances.obj_idxes[appear_mask] = torch.arange(self.runtime_tracker.current_id, self.runtime_tracker.current_id+appear_mask.sum(), device=appear_mask.device)[None]\n        self.runtime_tracker.current_id += appear_mask.sum()\n        \n        pred_dicts['track_instances'] = track_instances.clone()\n        pred_dicts['track_instances'].scores = pred_dicts['track_instances'].scores.squeeze(-1)\n        score_mask = (pred_dicts['track_instances'].scores > self.runtime_tracker.output_threshold)\n        pred_dicts['all_masks'] = score_mask.clone()\n\n        topk_instances =  self.post_merge_instances(pred_dicts['data'], kept_indicator=0)\n\n\n        return pred_dicts, topk_instances\n\n\n    @force_fp32(apply_to=('preds_dicts'))\n    def get_bboxes(self, preds_dicts, img_metas, rescale=False):\n        \"\"\"Generate bboxes from bbox head predictions.\n        Args:\n            preds_dicts (tuple[list[dict]]): Prediction results.\n            img_metas (list[dict]): Point cloud and image's meta info.\n        Returns:\n            list[dict]: Decoded bbox, scores and labels after nms.\n        \"\"\"\n        \n        preds_dicts = self.bbox_coder.decode(preds_dicts, layer_index=self.layer_index)\n        num_samples = len(preds_dicts)\n       \n        ret_list = []\n        for i in range(num_samples):\n            preds = preds_dicts[i]\n            bboxes = preds['bboxes']\n            bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 5] * 0.5\n            bboxes = img_metas[i]['box_type_3d'](bboxes, bboxes.size(-1))\n            scores = preds['scores']\n            labels = preds['labels']\n            bbox_results = bbox3d2result(bboxes, scores, labels)\n            for key in ['track_scores', 'obj_idxes']:\n                bbox_results[key] = preds[key].cpu()\n            ret_list.append(bbox_results)\n        return ret_list\n\nclass MLN(nn.Module):\n    ''' \n    Args:\n        c_dim (int): dimension of latent code c\n        f_dim (int): feature dimension\n    '''\n\n    def __init__(self, c_dim, f_dim=256, use_ln=True):\n        super().__init__()\n        self.c_dim = c_dim\n        self.f_dim = f_dim\n        self.use_ln = use_ln\n\n        self.reduce = nn.Sequential(\n            nn.Linear(c_dim, f_dim),\n            nn.ReLU(),\n        )\n        self.gamma = nn.Linear(f_dim, f_dim)\n        self.beta = nn.Linear(f_dim, f_dim)\n        if self.use_ln:\n            self.ln = nn.LayerNorm(f_dim, elementwise_affine=False)\n        self.init_weight()\n\n    def init_weight(self):\n        nn.init.zeros_(self.gamma.weight)\n        nn.init.zeros_(self.beta.weight)\n        nn.init.ones_(self.gamma.bias)\n        nn.init.zeros_(self.beta.bias)\n\n    def forward(self, x, c):\n        if self.use_ln:\n            x = self.ln(x)\n        c = self.reduce(c)\n        gamma = self.gamma(c)\n        beta = self.beta(c)\n        out = gamma * x + beta\n\n        return out"
  },
  {
    "path": "mmdet3d/models/fbbev/track_head/utils.py",
    "content": "import torch\nimport copy\nimport math\nimport torch\nimport torch.nn as nn \nimport numpy as np\nfrom mmcv.cnn import bias_init_with_prob, xavier_init\n\n\nclass StreamTensorMemory(object):\n    def __init__(self, batch_size):\n        self.train_bs = batch_size\n        self.training = True\n        self.bs = self.train_bs\n\n        self.train_memory_list = [None for i in range(self.bs)]\n        self.train_img_metas_memory = [None for i in range(self.bs)]\n\n        self.test_memory_list = [None] # bs = 1 when testing\n        self.test_img_metas_memory = [None]\n    \n    @property\n    def memory_list(self):\n        if self.training:\n            return self.train_memory_list\n        else:\n            return self.test_memory_list\n    \n    @property\n    def img_metas_memory(self):\n        if self.training:\n            return self.train_img_metas_memory\n        else:\n            return self.test_img_metas_memory\n\n    def update(self, memory, img_metas):\n        for i in range(self.bs):\n            self.memory_list[i] = memory[i].clone().detach()\n            self.img_metas_memory[i] = copy.deepcopy(img_metas[i])\n        \n    def reset_single(self, idx):\n        self.memory_list[idx] = None\n        self.img_metas_memory[idx] = None\n\n    def get(self, img_metas):\n        '''\n        img_metas: list[img_metas]\n        '''\n\n        tensor_list = []\n        img_metas_list = []\n        is_first_frame_list = []\n        \n        for i in range(self.bs):\n            if not self.img_metas_memory[i]:\n                is_first_frame = True\n            else:\n                is_first_frame = (img_metas[i]['scene_name'] != self.img_metas_memory[i]['scene_name'])\n\n            if is_first_frame:\n                self.reset_single(i)\n\n            tensor_list.append(self.memory_list[i])\n            img_metas_list.append(self.img_metas_memory[i])\n            is_first_frame_list.append(is_first_frame)\n\n        result = {\n            'tensor': tensor_list,\n            'img_metas': img_metas_list,\n            'is_first_frame': is_first_frame_list,\n        }\n        \n        return result\n    \n    def train(self, mode=True):\n        self.training = mode\n        if mode:\n            self.bs = self.train_bs\n        else:\n            self.bs = 1\n\n    def eval(self):\n        self.train(False)\n\n\n\nclass MotionMLP(nn.Module):\n    ''' \n    Args:\n        c_dim (int): dimension of latent code c\n        f_dim (int): feature dimension\n    '''\n\n    def __init__(self, c_dim, f_dim=512, identity=True):\n        super().__init__()\n        self.c_dim = c_dim\n        self.f_dim = f_dim\n        self.identity = identity\n\n        self.fc = nn.Sequential(\n            nn.Linear(c_dim + f_dim, 2*f_dim),\n            nn.LayerNorm(2*f_dim),\n            nn.ReLU(),\n            nn.Linear(2*f_dim, f_dim)\n        )\n        self.init_weights()\n\n    def init_weights(self):\n        for m in self.fc:\n            for param in m.parameters():\n                if param.dim() > 1:\n                    if self.identity:\n                        nn.init.zeros_(param)\n                    else:\n                        nn.init.xavier_uniform_(param)\n\n    def forward(self, x, c):\n        xc = torch.cat([x, c], dim=-1)\n        out = self.fc(xc)\n\n        if self.identity:\n            out = out + x\n        \n        return out"
  },
  {
    "path": "mmdet3d/models/fbbev/utils/__init__.py",
    "content": "from .bricks import save_tensor, run_time\nfrom .wechat_logger import MyWechatLoggerHook\nfrom .draw_bbox import *\nfrom .eval_hook import CustomDistEvalHook\nfrom .timer_cp import TimerCP"
  },
  {
    "path": "mmdet3d/models/fbbev/utils/bricks.py",
    "content": "import torch\nfrom torchvision.utils import make_grid\nimport torchvision\nimport matplotlib.pyplot as plt\nimport cv2\n\nfrom array import array\nfrom collections.abc import Iterable, Mapping\nfrom sys import getsizeof\nfrom types import GeneratorType\n\ndef compute_allocation(obj) -> int:\n    my_ids = set([id(obj)])  # store the ids of previously seen objects\n    to_compute = [obj]\n    allocation_size = 0\n    container_allocation = 0  # return the memory spent in containers like list or dictionaryes \n    while len(to_compute) > 0:\n        obj_to_check = to_compute.pop()\n        allocation_size += getsizeof(obj_to_check)\n        if type(obj_to_check) == str: # string just return the actual size\n            continue\n        if type(obj_to_check) == array:  # array just return the actual size \n            continue\n            \n        # if we have other object that only return the actual size, use the same logic as above\n        elif isinstance(obj_to_check, GeneratorType): # generator objet takes little memory\n            continue\n        elif isinstance(obj_to_check, Mapping): # for dic need to count the keys and values\n            container_allocation += getsizeof(obj_to_check)\n            for ikey, ivalue in obj_to_check.items():\n                if id(ikey) not in my_ids:\n                    my_ids.add(id(ikey))\n                    to_compute.append(ikey)\n                if id(ivalue) not in my_ids:\n                    my_ids.add(id(ivalue))\n                    to_compute.append(ivalue)\n        elif isinstance(obj_to_check, Iterable): # for iterable like object ,use the same logic above \n            container_allocation += getsizeof(obj_to_check)\n            for inner in obj_to_check:\n                if id(inner) not in my_ids:\n                    my_ids.add(id(inner))\n                    to_compute.append(inner)\n    return allocation_size, allocation_size - container_allocation\ndef convert_color(img_path):\n    plt.figure()\n    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)\n    plt.imsave(img_path, img, cmap=plt.get_cmap('viridis'))\n    plt.close()\n\n\ndef save_tensor(tensor, path, pad_value=254.0,normalize=False):\n    print('save_tensor', path)\n    tensor = tensor.to(torch.float).detach().cpu()\n    max_ = tensor.flatten(1).max(-1).values[:, None, None]\n    min_ = tensor.flatten(1).min(-1).values[:, None, None]\n    tensor = (tensor-min_)/(max_-min_)\n    if tensor.type() == 'torch.BoolTensor':\n        tensor = tensor*255\n    if len(tensor.shape) == 3:\n        tensor = tensor.unsqueeze(1)\n    tensor = make_grid(tensor, pad_value=pad_value, normalize=normalize).permute(1, 2, 0).numpy().copy()\n    torchvision.utils.save_image(torch.tensor(tensor).permute(2, 0, 1), path)\n    convert_color(path)\n\n\nimport functools\nimport time\nfrom collections import defaultdict\ntime_maps = defaultdict(lambda :0.)\ncount_maps = defaultdict(lambda :0.)\ndef run_time(name):\n    def middle(fn):\n        def wrapper(*args, **kwargs):\n            torch.cuda.synchronize()\n            start_time = time.perf_counter()\n            res = fn(*args, **kwargs)\n            torch.cuda.synchronize()\n            elapsed = time.perf_counter() - start_time\n            time_maps['%s : %s'%(name, fn.__name__) ] += elapsed\n            count_maps['%s : %s'%(name, fn.__name__) ] +=1\n            print(\"%s : %s takes up %f \"% (name, fn.__name__,time_maps['%s : %s'%(name, fn.__name__) ] /count_maps['%s : %s'%(name, fn.__name__) ] ))\n            return res\n        return wrapper\n    return middle"
  },
  {
    "path": "mmdet3d/models/fbbev/utils/draw_bbox.py",
    "content": "# Copyright (c) 2022-2023, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# https://github.com/NVlabs/FB-BEV/blob/main/LICENSE\n\n\n## copy-paste from mmdet3d. Used to debug\nimport mmcv\nimport numpy as np\nfrom mmdet3d.core.visualizer.image_vis import (draw_camera_bbox3d_on_img, draw_depth_bbox3d_on_img)\nimport cv2\nimport torch\nimport copy\nimport os.path as osp\nfrom mmdet3d.core.bbox.iou_calculators import BboxOverlaps3D\nfrom IPython import embed\n\nc_iou = BboxOverlaps3D(coordinate='lidar')\n\n\ndef plot_rect3d_on_img(img,\n                       num_rects,\n                       rect_corners,\n                       color=(0, 255, 0),\n                       thickness=1,\n                       img_metas=None,\n                       scores=None,\n                       types=None\n                       ):\n    \"\"\"Plot the boundary lines of 3D rectangular on 2D images.\n    Args:\n        img (numpy.array): The numpy array of image.\n        num_rects (int): Number of 3D rectangulars.\n        rect_corners (numpy.array): Coordinates of the corners of 3D\n            rectangulars. Should be in the shape of [num_rect, 8, 2].\n        color (tuple[int]): The color to draw bboxes. Default: (0, 255, 0).\n        thickness (int, optional): The thickness of bboxes. Default: 1.\n    \"\"\"\n\n    line_indices = [(0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7),\n                    (4, 5), (4, 7), (2, 6), (5, 6), (6, 7), (0, 5), (1, 4)]\n    for i in range(num_rects):\n        corners = rect_corners[i].astype(np.int)\n        try:\n            color = [(255, 0, 0),  (61, 102, 255), (241, 101, 72), (125, 125, 0), (61, 102, 255)][int(types[i])]\n        except:\n            color = (61, 102, 255)\n        back_mid = ((corners[0, 0] + corners[3, 0])//2, (corners[0, 1] + corners[3, 1])//2)\n        front_mid = ((corners[7, 0] + corners[4, 0]) // 2, (corners[7, 1] + corners[4, 1]) // 2)\n        bottom_center = ((front_mid[0] + back_mid[0])//2, (front_mid[1] + back_mid[1])//2)\n        try:\n            cv2.line(img, front_mid, bottom_center, color, thickness+1, cv2.LINE_AA)\n        except:\n            pass\n        for j, (start, end) in enumerate(line_indices):\n            try:\n                if j in [12, 13]:\n                    # front_thickness = thickness\n                    # cv2.line(img, (corners[start, 0], corners[start, 1]),\n                    #          (corners[end, 0], corners[end, 1]), (0, 160, 0), front_thickness,\n                    #          cv2.LINE_AA)\n                    pass\n                else:\n                    cv2.line(img, (corners[start, 0], corners[start, 1]),\n                             (corners[end, 0], corners[end, 1]), color, thickness+1,\n                             cv2.LINE_AA)\n            except:\n                pass\n            \n\n            # for p in range(8):\n            #     try:\n            #         cv2.putText(img, str(p), corners[p,:2], cv2.FONT_HERSHEY_COMPLEX, 1.0, (0, 0, 255), 2)\n            #     except:\n            #         pass\n            # if img_metas != 0 and j == 0:\n            #     text = img_metas[i]\n            #     try:\n            #         cv2.putText(img, '%.1f %.1f %.1f' % (text[0], text[1], text[2]), (corners[start, 0], corners[start, 1]),\n            #                cv2.FONT_HERSHEY_COMPLEX, 1.0, (0, 0, 255), 2)\n            #     except:\n            #         pass\n            #    print('bug in plot_rect3d_on_img')\n        # print(str(scores[i])[1:4])\n        try:\n            if scores[i] >= 1.0:\n                scores[i] = str(01.0)\n            # cv2.putText(img, str(scores[i])[1:4], (corners[6, 0], corners[6, 1]), cv2.FONT_HERSHEY_COMPLEX, 1.0, (0, 0, 255), 2)\n        except:\n            pass\n    return img.astype(np.uint8)\n\n\ndef draw_lidar_bbox3d_on_img(bboxes3d,\n                             raw_img,\n                             lidar2img_rt,\n                             img_metas,\n                             color=(0, 255, 0),\n                             camera_params=None,\n                             scores=None,\n                             types=None,\n                             thickness=1):\n    \"\"\"Project the 3D bbox on 2D plane and draw on input image.\n    Args:\n        bboxes3d (:obj:`LiDARInstance3DBoxes`):\n            3d bbox in lidar coordinate system to visualize.\n        raw_img (numpy.array): The numpy array of image.\n        lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix\n            according to the camera intrinsic parameters.\n        img_metas (dict): Useless here.\n        color (tuple[int]): The color to draw bboxes. Default: (0, 255, 0).\n        thickness (int, optional): The thickness of bboxes. Default: 1.\n    \"\"\"\n    img = raw_img.copy()\n    corners_3d = bboxes3d.corners\n    num_bbox = corners_3d.shape[0]\n    if camera_params is None:\n        pts_4d = np.concatenate(\n            [corners_3d.reshape(-1, 3),\n            np.ones((num_bbox * 8, 1))], axis=-1)\n        lidar2img_rt = copy.deepcopy(lidar2img_rt).reshape(4, 4)\n        if isinstance(lidar2img_rt, torch.Tensor):\n            lidar2img_rt = lidar2img_rt.cpu().numpy()\n        pts_2d = pts_4d @ lidar2img_rt.T\n\n        pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=1e5)\n        pts_2d[:, 0] /= pts_2d[:, 2]\n        pts_2d[:, 1] /= pts_2d[:, 2]\n        imgfov_pts_2d = pts_2d[..., :2].reshape(num_bbox, 8, 2)\n    else:\n        rots, trans, intrins, post_rots, post_trans, bda, i = camera_params\n        B = 1\n        N = 6\n        num_frame=rots.size(0)//N\n        extra = [\n            rots.view(B, num_frame, N, 3, 3),\n            trans.view(B, num_frame, N, 3),\n            intrins.view(B, num_frame, N, 3, 3),\n            post_rots.view(B, num_frame, N, 3, 3),\n            post_trans.view(B, num_frame, N, 3)\n        ]\n        extra = [torch.split(t, 1, 1) for t in extra]\n        extra = [[p.squeeze(1) for p in t] for t in extra]\n        rots, trans, intrins, post_rots, post_trans = extra\n        cam_params = [rots[0], trans[0], intrins[0], post_rots[0], post_trans[0]]    \n        rots, trans, intrins, post_rots, post_trans = cam_params\n        \n        reference_points = bboxes3d.corners[None]\n        eps = 1e-5\n        _, ogfH, ogfW = img.shape\n        reference_points = reference_points[None, None].repeat(B, N, 1, 1, 1, 1)\n        reference_points = torch.inverse(bda).view(B, 1, 1, 1, 1, 3,\n                          3).matmul(reference_points.unsqueeze(-1)).squeeze(-1)\n        reference_points -= trans.view(B, N, 1, 1, 1, 3)\n        combine = rots.matmul(torch.inverse(intrins)).inverse()\n        reference_points_cam = combine.view(B, N, 1, 1, 1, 3, 3).matmul(reference_points.unsqueeze(-1)).squeeze(-1)\n        reference_points_cam = torch.cat([reference_points_cam[..., 0:2] / torch.maximum(\n            reference_points_cam[..., 2:3], torch.ones_like(reference_points_cam[..., 2:3])*eps),  reference_points_cam[..., 2:3]], 5\n            )\n        reference_points_cam = post_rots.view(B, N, 1, 1, 1, 3, 3).matmul(reference_points_cam.unsqueeze(-1)).squeeze(-1)\n        reference_points_cam += post_trans.view(B, N, 1, 1, 1, 3) \n        # reference_points_cam[..., 0] /= ogfW\n        # reference_points_cam[..., 1] /= ogfH\n        imgfov_pts_2d = reference_points_cam[0,i,0].cpu().numpy()\n\n\n    return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness, img_metas,scores=scores, types=types)\n\n    \ndef show_multi_modality_result(img,\n                               gt_bboxes,\n                               pred_bboxes,\n                               proj_mat,\n                               out_dir,\n                               filename,\n                               box_mode='lidar',\n                               img_metas=None,\n                               show=True,\n                               scores=None,\n                               types=None,\n                               camera_params=None,\n                               gt_bbox_color=(61, 102, 255),\n                               pred_bbox_color=(241, 101, 72)):\n    \"\"\"Convert multi-modality detection results into 2D results.\n    Project the predicted 3D bbox to 2D image plane and visualize them.\n    Args:\n        img (np.ndarray): The numpy array of image in cv2 fashion.\n        gt_bboxes (:obj:`BaseInstance3DBoxes`): Ground truth boxes.\n        pred_bboxes (:obj:`BaseInstance3DBoxes`): Predicted boxes.\n        proj_mat (numpy.array, shape=[4, 4]): The projection matrix\n            according to the camera intrinsic parameters.\n        out_dir (str): Path of output directory.\n        filename (str): Filename of the current frame.\n        box_mode (str): Coordinate system the boxes are in. Should be one of\n           'depth', 'lidar' and 'camera'. Defaults to 'lidar'.\n        img_metas (dict): Used in projecting depth bbox.\n        show (bool): Visualize the results online. Defaults to False.\n        gt_bbox_color (str or tuple(int)): Color of bbox lines.\n           The tuple of color should be in BGR order. Default: (255, 102, 61)\n        pred_bbox_color (str or tuple(int)): Color of bbox lines.\n           The tuple of color should be in BGR order. Default: (72, 101, 241)\n    \"\"\"\n    if box_mode == 'depth':\n        draw_bbox = draw_depth_bbox3d_on_img\n    elif box_mode == 'lidar':\n        draw_bbox = draw_lidar_bbox3d_on_img\n    elif box_mode == 'camera':\n        draw_bbox = draw_camera_bbox3d_on_img\n    else:\n        raise NotImplementedError(f'unsupported box mode {box_mode}')\n\n    result_path = osp.join(out_dir, filename)\n    # embed()\n    # exit()\n\n    # mmcv.mkdir_or_exist(out_dir)\n    if scores is not None:\n        keep = scores > 0.3\n        scores = scores[keep]\n        pred_bboxes = pred_bboxes[keep]\n    if show:\n        show_img = img.copy()\n        if gt_bboxes is not None:\n            text = [[bbox[0], bbox[1], bbox[6]] for bbox in gt_bboxes.tensor.cpu().numpy()]\n                #list(c_iou(gt_bboxes.tensor, pred_bboxes.tensor).max(1).values.cpu().numpy())\n            img_metas = text\n\n\n            show_img = draw_bbox(\n            gt_bboxes, show_img, proj_mat, img_metas, color=gt_bbox_color, camera_params=camera_params)\n\n        if pred_bboxes is not None:\n\n                show_img = draw_bbox(\n                    pred_bboxes,\n                    show_img,\n                    proj_mat,\n                    None,\n                    scores=scores,\n                    types=types,\n                    camera_params=camera_params,\n                    color=pred_bbox_color)\n\n                # print('bug in show_multi_modality_result')\n\n\n        mmcv.imwrite(show_img, result_path.replace('.png', '.jpg'))\n\n        # mmcv.imshow(show_img, win_name='project_bbox3d_img', wait_time=0)\n    # print()\n    # embed()\n    return\n                   \n"
  },
  {
    "path": "mmdet3d/models/fbbev/utils/eval_hook.py",
    "content": "# Copyright (c) 2022-2023, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# https://github.com/NVlabs/FB-BEV/blob/main/LICENSE\n\n\nimport bisect\nimport os.path as osp\nfrom mmdet3d.core.hook.utils import is_parallel\n\nimport mmcv\nimport torch.distributed as dist\nfrom mmcv.runner import DistEvalHook as BaseDistEvalHook\nfrom mmcv.runner import EvalHook as BaseEvalHook\nfrom torch.nn.modules.batchnorm import _BatchNorm\nfrom mmdet.core.evaluation.eval_hooks import DistEvalHook\n\n\ndef _calc_dynamic_intervals(start_interval, dynamic_interval_list):\n    assert mmcv.is_list_of(dynamic_interval_list, tuple)\n\n    dynamic_milestones = [0]\n    dynamic_milestones.extend(\n        [dynamic_interval[0] for dynamic_interval in dynamic_interval_list])\n    dynamic_intervals = [start_interval]\n    dynamic_intervals.extend(\n        [dynamic_interval[1] for dynamic_interval in dynamic_interval_list])\n    return dynamic_milestones, dynamic_intervals\n\n\nclass CustomDistEvalHook(BaseDistEvalHook):\n\n    def __init__(self, *args, dynamic_intervals=None,  work_dir='test', **kwargs):\n        super(CustomDistEvalHook, self).__init__(*args, **kwargs)\n        self.use_dynamic_intervals = dynamic_intervals is not None\n        if self.use_dynamic_intervals:\n            self.dynamic_milestones, self.dynamic_intervals = \\\n                _calc_dynamic_intervals(self.interval, dynamic_intervals)\n        self.work_dir = work_dir\n    def _decide_interval(self, runner):\n        if self.use_dynamic_intervals:\n            progress = runner.epoch if self.by_epoch else runner.iter\n            step = bisect.bisect(self.dynamic_milestones, (progress + 1))\n            # Dynamically modify the evaluation interval\n            self.interval = self.dynamic_intervals[step - 1]\n\n    def before_train_epoch(self, runner):\n        \"\"\"Evaluate the model only at the start of training by epoch.\"\"\"\n        self._decide_interval(runner)\n        super().before_train_epoch(runner)\n\n    def before_train_iter(self, runner):\n        self._decide_interval(runner)\n        super().before_train_iter(runner)\n\n    def _do_evaluate(self, runner):\n        \"\"\"perform evaluation and save ckpt.\"\"\"\n        # Synchronization of BatchNorm's buffer (running_mean\n        # and running_var) is not supported in the DDP of pytorch,\n        # which may cause the inconsistent performance of models in\n        # different ranks, so we broadcast BatchNorm's buffers\n        # of rank 0 to other ranks to avoid this.\n        if is_parallel(runner.model):\n            if  runner.model.module.history_bev is not None:\n                history_bev = runner.model.module.history_bev.clone()\n                history_seq_ids = runner.model.module.history_seq_ids.clone()\n                history_forward_augs = runner.model.module.history_forward_augs.clone()\n                history_sweep_time = runner.model.module.history_sweep_time.clone()\n            else:\n                history_bev = None\n                \n            runner.model.module.history_bev=None\n            runner.ema_model.ema_model.module.history_bev=None\n        else:\n            runner.ema_model.ema_model.history_bev=None\n            runner.model.history_bev = None\n        if self.broadcast_bn_buffer:\n            model = runner.model\n            for name, module in model.named_modules():\n                if isinstance(module,\n                              _BatchNorm) and module.track_running_stats:\n                    dist.broadcast(module.running_var, 0)\n                    dist.broadcast(module.running_mean, 0)\n\n        if not self._should_evaluate(runner):\n            return\n\n        tmpdir = self.tmpdir\n        if tmpdir is None:\n            tmpdir = osp.join(runner.work_dir, '.eval_hook')\n\n        from mmdet3d.apis.test import custom_multi_gpu_test # to solve circlur  import\n\n        results = custom_multi_gpu_test(\n            runner.ema_model.ema_model,\n            self.dataloader,\n            tmpdir=tmpdir,\n            gpu_collect=self.gpu_collect)\n        if runner.rank == 0:\n            print('\\n')\n            runner.log_buffer.output['eval_iter_num'] = len(self.dataloader)\n\n            key_score = self.evaluate(runner, results)\n\n            if self.save_best:\n                self._save_ckpt(runner, key_score)\n        if is_parallel(runner.model):\n            if history_bev is not None:\n                runner.model.module.history_bev = history_bev.clone()\n                runner.model.module.history_seq_ids = history_seq_ids.clone()\n                runner.model.module.history_forward_augs = history_forward_augs.clone()\n                runner.model.module.history_sweep_time = history_sweep_time.clone()\n            else:\n                runner.model.module.history_bev = None\n            runner.ema_model.ema_model.module.history_bev = None\n        else:\n            runner.model.history_bev = None\n            runner.ema_model.ema_model.history_bev = None\n            # ema_model\n\n    def evaluate(self, runner, results):\n        \"\"\"Evaluate the results.\n\n        Args:\n            runner (:obj:`mmcv.Runner`): The underlined training runner.\n            results (list): Output results.\n        \"\"\"\n        if 'jsonfile_prefix' not in self.eval_kwargs:\n            self.eval_kwargs['jsonfile_prefix'] = osp.join(self.work_dir, 'test')\n        eval_res = self.dataloader.dataset.evaluate(\n            results, logger=runner.logger , **self.eval_kwargs)\n\n        for name, val in eval_res.items():\n            runner.log_buffer.output[name] = val\n        runner.log_buffer.ready = True\n\n        if self.save_best is not None:\n            # If the performance of model is pool, the `eval_res` may be an\n            # empty dict and it will raise exception when `self.save_best` is\n            # not None. More details at\n            # https://github.com/open-mmlab/mmdetection/issues/6265.\n            if not eval_res:\n                warnings.warn(\n                    'Since `eval_res` is an empty dict, the behavior to save '\n                    'the best checkpoint will be skipped in this evaluation.')\n                return None\n\n            if self.key_indicator == 'auto':\n                # infer from eval_results\n                self._init_rule(self.rule, list(eval_res.keys())[0])\n            return eval_res[self.key_indicator]\n\n        return None\n\n"
  },
  {
    "path": "mmdet3d/models/fbbev/utils/grid_mask.py",
    "content": "import torch\nimport torch.nn as nn\nimport numpy as np\nfrom PIL import Image\n\nclass Grid(object):\n    def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.):\n        self.use_h = use_h\n        self.use_w = use_w\n        self.rotate = rotate\n        self.offset = offset\n        self.ratio = ratio\n        self.mode=mode\n        self.st_prob = prob\n        self.prob = prob\n\n    def set_prob(self, epoch, max_epoch):\n        self.prob = self.st_prob * epoch / max_epoch\n\n    def __call__(self, img, label):\n        if np.random.rand() > self.prob:\n            return img, label\n        h = img.size(1)\n        w = img.size(2)\n        self.d1 = 2\n        self.d2 = min(h, w)\n        hh = int(1.5*h)\n        ww = int(1.5*w)\n        d = np.random.randint(self.d1, self.d2)\n        if self.ratio == 1:\n            self.l = np.random.randint(1, d)\n        else:\n            self.l = min(max(int(d*self.ratio+0.5),1),d-1)\n        mask = np.ones((hh, ww), np.float32)\n        st_h = np.random.randint(d)\n        st_w = np.random.randint(d)\n        if self.use_h:\n            for i in range(hh//d):\n                s = d*i + st_h\n                t = min(s+self.l, hh)\n                mask[s:t,:] *= 0\n        if self.use_w:\n            for i in range(ww//d):\n                s = d*i + st_w\n                t = min(s+self.l, ww)\n                mask[:,s:t] *= 0\n       \n        r = np.random.randint(self.rotate)\n        mask = Image.fromarray(np.uint8(mask))\n        mask = mask.rotate(r)\n        mask = np.asarray(mask)\n        mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]\n\n        mask = torch.from_numpy(mask).float()\n        if self.mode == 1:\n            mask = 1-mask\n\n        mask = mask.expand_as(img)\n        if self.offset:\n            offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float()\n            offset = (1 - mask) * offset\n            img = img * mask + offset\n        else:\n            img = img * mask \n\n        return img, label\n\n\nclass GridMask(nn.Module):\n    def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.):\n        super(GridMask, self).__init__()\n        self.use_h = use_h\n        self.use_w = use_w\n        self.rotate = rotate\n        self.offset = offset\n        self.ratio = ratio\n        self.mode = mode\n        self.st_prob = prob\n        self.prob = prob\n\n    def set_prob(self, epoch, max_epoch):\n        self.prob = self.st_prob * epoch / max_epoch #+ 1.#0.5\n\n    def forward(self, x):\n        if np.random.rand() > self.prob or not self.training:\n            return x\n        n,c,h,w = x.size()\n        x = x.view(-1,h,w)\n        hh = int(1.5*h)\n        ww = int(1.5*w)\n        d = np.random.randint(2, h)\n        self.l = min(max(int(d*self.ratio+0.5),1),d-1)\n        mask = np.ones((hh, ww), np.float32)\n        st_h = np.random.randint(d)\n        st_w = np.random.randint(d)\n        if self.use_h:\n            for i in range(hh//d):\n                s = d*i + st_h\n                t = min(s+self.l, hh)\n                mask[s:t,:] *= 0\n        if self.use_w:\n            for i in range(ww//d):\n                s = d*i + st_w\n                t = min(s+self.l, ww)\n                mask[:,s:t] *= 0\n       \n        r = np.random.randint(self.rotate)\n        mask = Image.fromarray(np.uint8(mask))\n        mask = mask.rotate(r)\n        mask = np.asarray(mask)\n        mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]\n\n        mask = torch.from_numpy(mask).float().cuda()\n        if self.mode == 1:\n            mask = 1-mask\n        mask = mask.expand_as(x)\n        if self.offset:\n            offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float().cuda()\n            x = x * mask + offset * (1 - mask)\n        else:\n            x = x * mask \n\n        return x.view(n,c,h,w)"
  },
  {
    "path": "mmdet3d/models/fbbev/utils/timer_cp.py",
    "content": "from mmcv.utils import Registry, is_method_overridden\nfrom mmcv.runner.hooks import HOOKS, CheckpointHook, Hook\nfrom mmcv.runner.dist_utils import allreduce_params, master_only\nimport time\n\n@HOOKS.register_module()\nclass TimerCP(CheckpointHook):\n\n    # designed for NVIDIA ORD, each job can only run for 4 hours.\n    # period = 4h = 4 * 3600\n    def __init__(self, period=14400):\n        super().__init__()\n        self.period = period - 180 # 3 mins redundancy\n        self.not_save = True\n\n    def before_run(self, runner):\n        super().before_run(runner)\n        self.start_time = time.time()\n\n    def after_train_epoch(self, runner):\n        pass\n\n    def before_train_iter(self, runner):\n        running_time = (time.time() - self.start_time)\n        if running_time > self.period and self.not_save:\n            runner.logger.info(\n                f'TimerCP: Saving checkpoint at {runner.iter + 1} iterations. Period: '+'%.1fh' % (self.period/3600)\n                )\n            if self.sync_buffer:\n                allreduce_params(runner.model.buffers())\n            self._save_checkpoint(runner)\n            self.not_save = False\n\n    @master_only\n    def _save_checkpoint(self, runner):\n        super()._save_checkpoint(runner)\n  \n\n\n "
  },
  {
    "path": "mmdet3d/models/fbbev/utils/wechat_logger.py",
    "content": "# Copyright (c) 2022-2023, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# https://github.com/NVlabs/FB-BEV/blob/main/LICENSE\n\n\nfrom typing import Dict\nimport numpy as np\nimport os.path as osp\nfrom mmcv.runner.dist_utils import master_only\nfrom mmcv.runner.hooks import HOOKS, Hook\nfrom mmcv.runner.hooks.logger.base import LoggerHook\nfrom urllib import request, parse\nimport json\nfrom urllib.error import HTTPError, URLError\nimport socket\n\n\n\n@HOOKS.register_module()\nclass GradChecker(Hook):\n    def after_train_iter(self, runner):\n        max_key = None\n        max_val = -1e5\n        min_key = None\n        min_val = 1e5\n        for key, val in runner.model.named_parameters():\n\n            # if val.grad.max() > max_val:\n            #     max_val = val.grad.max()\n            #     max_key = key\n            # if val.grad.min() < min_val:\n            #     min_val = val.grad.min() \n            #     min_key = key\n            if val.grad == None and val.requires_grad:\n\n            \n                print('WARNNING: {key}\\'s parameters are not be used!!!!'.format(key=key))\n        # print('max grd', max_key, '  ', max_val)\n        # print('min grad', min_key, '  ', min_val)\n\n\n@HOOKS.register_module()\nclass MyWechatLoggerHook(LoggerHook):\n    \"\"\"Class to log metrics to Wechat. Get your latest training results immediately!\n\n    Args:\n        interval (int): Logging interval (every k iterations).\n            Default 10.\n        ignore_last (bool): Ignore the log of last iterations in each epoch\n            if less than `interval`.\n            Default: True.\n        reset_flag (bool): Whether to clear the output buffer after logging.\n            Default: False.\n        by_epoch (bool): Whether EpochBasedRunner is used.\n            Default: True.\n        allowed_subkeys: No need to send all results to your phone. Catch the point!\n        miao_code: Get your own code from https://www.showdoc.com.cn/miaotixing/9175237605891603\n    \"\"\"\n\n    def __init__(self,\n                 interval: int = 10,\n                 ignore_last: bool = True,\n                 reset_flag: bool = False,\n                 commit: bool = True,\n                 by_epoch: bool = True,\n                 allowed_subkeys = ['NDS', 'mAP'],\n                 miao_code='xxxxx'):\n        super().__init__(interval, ignore_last, reset_flag, by_epoch)\n\n        self.miao_code = miao_code\n        self.allowed_subkeys = allowed_subkeys\n        self.notification = True\n\n    @master_only\n    def before_run(self, runner) -> None:\n        super().before_run(runner)\n\n    @master_only\n    def get_table_text(self, runner, tags):\n        row_lists = []\n        row_lists.append([runner.meta['exp_name'], ''])\n        if self.by_epoch:\n            row_lists.append(['Epoch', runner.epoch+1])\n        for key in tags.keys():\n            for allowed_subkey in self.allowed_subkeys:\n                if allowed_subkey in key:\n                    row_lists.append([key, tags[key]])\n\n        table_txt = ''\n        for each_row in row_lists:\n            table_txt += '{key}:  {value}\\n'.format(key=each_row[0], value=str(each_row[1]))\n        return table_txt\n\n    @master_only\n    def log(self, runner) -> None:\n        if not self.notification: return\n\n        mode=self.get_mode(runner)\n        tags = self.get_loggable_tags(runner)\n        text = None\n\n        if mode == 'train':\n            if np.isnan(tags['train/loss']) or np.isinf(tags['train/loss']):\n                text = runner.meta['exp_name'] + 'got NaN/INF loss'\n                runner.logger.info('got NaN/INF loss value, we will not send any notification to your phone later')\n                self.notification = False\n        elif mode == 'val':   \n            text =  self.get_table_text(runner, tags)\n        else:\n            assert False, 'what is the running status?'\n\n        self._send(runner, text)\n\n    @master_only\n    def _send(self, runner, text) -> None:\n        if text is None: return \n        \n        page=None\n\n        try:\n            page = request.urlopen(\"http://miaotixing.com/trigger?\" + parse.urlencode({\"id\":self.miao_code, \"text\":text, \"type\":\"json\"}), timeout=5)\n        except HTTPError as error:\n            runner.logger.info('Data not retrieved because %s\\nURL: %s', error, url)\n        except URLError as error:\n            if isinstance(error.reason, socket.timeout):\n                runner.logger.info('MiaoTiXing: socket timed out - URL %s', url)\n            else:\n                runner.logger.info('MiaoTiXing: some other error happened ')\n        else:\n            runner.logger.info('MiaoTiXing: Access successful.')\n        \n        if page is None: return\n        result = page.read()\n        jsonObj = json.loads(result)\n        \n\n    @master_only\n    def after_run(self, runner) -> None:\n        text = runner.meta['exp_name'] + ' Done!!!'\n        self._send(runner, text)\n        pass\n"
  },
  {
    "path": "mmdet3d/models/fbbev/view_transformation/__init__.py",
    "content": "from .forward_projection import *\nfrom .backward_projection import *\n"
  },
  {
    "path": "mmdet3d/models/fbbev/view_transformation/backward_projection/__init__.py",
    "content": "from .backward_projection import BackwardProjection\nfrom .bevformer_utils import *\n\n"
  },
  {
    "path": "mmdet3d/models/fbbev/view_transformation/backward_projection/backward_projection.py",
    "content": "# Copyright (c) 2022-2023, NVIDIA Corporation & Affiliates. All rights reserved. \r\n# \r\n# This work is made available under the Nvidia Source Code License-NC. \r\n# To view a copy of this license, visit \r\n# https://github.com/NVlabs/FB-BEV/blob/main/LICENSE\r\n\r\n\r\n# ---------------------------------------------\r\n#  Modified by Zhiqi Li\r\n# ---------------------------------------------\r\n\r\nimport copy\r\nimport torch\r\nimport torch.nn as nn\r\nimport torch.nn.functional as F\r\nfrom mmcv.cnn import Linear, bias_init_with_prob\r\nfrom mmcv.utils import TORCH_VERSION, digit_version\r\nfrom mmcv.runner.base_module import BaseModule\r\nfrom mmdet.core import (multi_apply, multi_apply, reduce_mean)\r\nfrom mmdet.models.utils.transformer import inverse_sigmoid\r\nfrom mmdet.models import HEADS\r\nfrom mmdet.models.dense_heads import DETRHead\r\nfrom mmdet3d.core.bbox.coders import build_bbox_coder\r\nfrom mmcv.cnn.bricks.transformer import build_positional_encoding\r\nfrom mmcv.runner import force_fp32, auto_fp16\r\nimport numpy as np\r\nimport mmcv\r\nimport cv2 as cv\r\nfrom mmcv.cnn.bricks.transformer import FFN, build_positional_encoding\r\nfrom mmdet.models.utils import build_transformer\r\n\r\n\r\n\r\n@HEADS.register_module()\r\nclass BackwardProjection(BaseModule):\r\n    \"\"\"Head of Detr3D.\r\n    Args:\r\n        with_box_refine (bool): Whether to refine the reference points\r\n            in the decoder. Defaults to False.\r\n        as_two_stage (bool) : Whether to generate the proposal from\r\n            the outputs of encoder.\r\n        transformer (obj:`ConfigDict`): ConfigDict is used for building\r\n            the Encoder and Decoder.\r\n        bev_h, bev_w (int): spatial shape of BEV queries.\r\n    \"\"\"\r\n\r\n    def __init__(self,\r\n                 *args,\r\n                 transformer=None,\r\n                 positional_encoding=None,\r\n                 pc_range=None,\r\n                 in_channels=64,\r\n                 out_channels=64,\r\n                 use_zero_embedding=False,\r\n                 bev_h=30,\r\n                 bev_w=30,\r\n                 \r\n                 **kwargs):\r\n        super().__init__()\r\n        self.bev_h = bev_h\r\n        self.bev_w = bev_w\r\n        self.fp16_enabled = False\r\n        self.pc_range = pc_range\r\n        self.use_zero_embedding = use_zero_embedding\r\n        self.real_w = self.pc_range[3] - self.pc_range[0]\r\n        self.real_h = self.pc_range[4] - self.pc_range[1]\r\n       \r\n        self.positional_encoding = build_positional_encoding(\r\n            positional_encoding)\r\n        self.transformer = build_transformer(transformer)\r\n        self.embed_dims = self.transformer.embed_dims\r\n\r\n\r\n        self._init_layers()\r\n\r\n    def _init_layers(self):\r\n        self.bev_embedding = nn.Embedding(\r\n                self.bev_h * self.bev_w, self.embed_dims)\r\n\r\n    def init_weights(self):\r\n        \"\"\"Initialize weights of the DeformDETR head.\"\"\"\r\n        self.transformer.init_weights()\r\n\r\n    @auto_fp16(apply_to=('mlvl_feats'))\r\n    def forward(self, mlvl_feats, img_metas, lss_bev=None, gt_bboxes_3d=None, cam_params=None, pred_img_depth=None, bev_mask=None):\r\n        \"\"\"Forward function.\r\n        Args:\r\n            mlvl_feats (tuple[Tensor]): Features from the upstream\r\n                network, each is a 5D-tensor with shape\r\n                (B, N, C, H, W).\r\n        Returns:\r\n            all_cls_scores (Tensor): Outputs from the classification head, \\\r\n                shape [nb_dec, bs, num_query, cls_out_channels]. Note \\\r\n                cls_out_channels should includes background.\r\n            all_bbox_preds (Tensor): Sigmoid outputs from the regression \\\r\n                head with normalized coordinate format (cx, cy, w, l, cz, h, theta, vx, vy). \\\r\n                Shape [nb_dec, bs, num_query, 9].\r\n        \"\"\"\r\n\r\n        bs, num_cam, _, _, _ = mlvl_feats[0].shape\r\n        dtype = mlvl_feats[0].dtype\r\n        bev_queries = self.bev_embedding.weight.to(dtype)\r\n        bev_queries = bev_queries.unsqueeze(1).repeat(1, bs, 1)\r\n        \r\n        if lss_bev is not None:\r\n            lss_bev = lss_bev.flatten(2).permute(2, 0, 1)\r\n            bev_queries = bev_queries + lss_bev\r\n        \r\n        if bev_mask is not None:\r\n            bev_mask = bev_mask.reshape(bs, -1)\r\n\r\n        bev_pos = self.positional_encoding(bs, self.bev_h, self.bev_w, bev_queries.device).to(dtype)\r\n\r\n        bev =  self.transformer(\r\n                mlvl_feats,\r\n                bev_queries,\r\n                self.bev_h,\r\n                self.bev_w,\r\n                grid_length=(self.real_h / self.bev_h,\r\n                             self.real_w / self.bev_w),\r\n                bev_pos=bev_pos,\r\n                img_metas=img_metas,\r\n                cam_params=cam_params,\r\n                gt_bboxes_3d=gt_bboxes_3d,\r\n                pred_img_depth=pred_img_depth,\r\n                prev_bev=None,\r\n                bev_mask=bev_mask,\r\n            )\r\n\r\n        bev = bev.permute(0, 2, 1).view(bs, -1, self.bev_h, self.bev_w).contiguous()\r\n\r\n\r\n        return bev\r\n\r\n"
  },
  {
    "path": "mmdet3d/models/fbbev/view_transformation/backward_projection/bevformer_utils/__init__.py",
    "content": "from .bevformer import BEVFormer\nfrom .bevformer_encoder import bevformer_encoder, BEVFormerEncoderLayer\nfrom .spatial_cross_attention_depth import DA_MSDeformableAttention, DA_SpatialCrossAttention\nfrom .positional_encoding import CustormLearnedPositionalEncoding"
  },
  {
    "path": "mmdet3d/models/fbbev/view_transformation/backward_projection/bevformer_utils/bevformer.py",
    "content": "# Copyright (c) 2022-2023, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# https://github.com/NVlabs/FB-BEV/blob/main/LICENSE\n\nimport numpy as np\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import xavier_init\nfrom mmcv.cnn.bricks.transformer import build_transformer_layer_sequence\nfrom mmcv.runner.base_module import BaseModule\nfrom mmdet.models.utils.builder import TRANSFORMER\nfrom torch.nn.init import normal_\nfrom mmcv.runner.base_module import BaseModule\nfrom torchvision.transforms.functional import rotate\nfrom .spatial_cross_attention_depth import DA_MSDeformableAttention\nfrom mmcv.runner import force_fp32, auto_fp16\nfrom mmdet.models import  build_neck\n\n\n@TRANSFORMER.register_module()\nclass BEVFormer(BaseModule):\n    \"\"\"Implements the Detr3D transformer.\n    Args:\n        as_two_stage (bool): Generate query from encoder features.\n            Default: False.\n        num_feature_levels (int): Number of feature maps from FPN:\n            Default: 4.\n        two_stage_num_proposals (int): Number of proposals when set\n            `as_two_stage` as True. Default: 300.\n    \"\"\"\n\n    def __init__(self,\n                 num_cams=6,\n                 encoder=None,\n                 embed_dims=256,\n                 output_dims=256,     \n                 use_cams_embeds=True,\n                 **kwargs):\n        super(BEVFormer, self).__init__(**kwargs)\n        self.encoder = build_transformer_layer_sequence(encoder)\n        self.embed_dims = embed_dims\n        self.num_cams = num_cams\n        self.fp16_enabled = False\n        self.output_dims = output_dims\n    \n        self.use_cams_embeds = use_cams_embeds\n\n        self.init_layers()       \n\n    def init_layers(self):\n        \"\"\"Initialize layers of the Detr3DTransformer.\"\"\"\n        self.cams_embeds = nn.Parameter(\n            torch.Tensor(self.num_cams, self.embed_dims))\n      \n\n    def init_weights(self):\n        \"\"\"Initialize the transformer weights.\"\"\"\n        for p in self.parameters():\n            if p.dim() > 1:\n                nn.init.xavier_uniform_(p)\n        for m in self.modules():\n            if isinstance(m, DA_MSDeformableAttention):\n                try:\n                    m.init_weight()\n                except AttributeError:\n                    m.init_weights()\n        normal_(self.cams_embeds)\n\n    @auto_fp16(apply_to=('mlvl_feats', 'bev_queries', 'prev_bev', 'bev_pos'))\n    def forward(\n            self,\n            mlvl_feats,\n            bev_queries,\n            bev_h,\n            bev_w,\n            # grid_length=[0.512, 0.512],\n            bev_pos=None,\n            cam_params=None,\n            gt_bboxes_3d=None,\n            pred_img_depth=None,\n            prev_bev=None,\n            bev_mask=None,\n            **kwargs):\n        \"\"\"\n        obtain bev features.\n        \"\"\"\n\n        bs = mlvl_feats[0].size(0)\n\n        bev_pos = bev_pos.flatten(2).permute(2, 0, 1)\n\n        feat_flatten = []\n        spatial_shapes = []\n        for lvl, feat in enumerate(mlvl_feats):\n            bs, num_cam, c, h, w = feat.shape\n            spatial_shape = (h, w)\n            feat = feat.flatten(3).permute(1, 0, 3, 2)\n            if self.use_cams_embeds:\n                feat = feat + self.cams_embeds[:, None, None, :].to(feat.dtype)\n            else:\n                feat = feat + self.cams_embeds[:, None, None, :].to(feat.dtype) * 0\n            spatial_shapes.append(spatial_shape)\n            feat_flatten.append(feat)\n\n        feat_flatten = torch.cat(feat_flatten, 2)\n        spatial_shapes = torch.as_tensor(\n            spatial_shapes, dtype=torch.long, device=bev_pos.device)\n        level_start_index = torch.cat((spatial_shapes.new_zeros(\n            (1,)), spatial_shapes.prod(1).cumsum(0)[:-1]))\n\n        feat_flatten = feat_flatten.permute(0, 2, 1, 3)  # (num_cam, H*W, bs, embed_dims)\n\n        bev_embed = self.encoder(\n            bev_queries,\n            feat_flatten,\n            feat_flatten,\n            bev_h=bev_h,\n            bev_w=bev_w,\n            bev_pos=bev_pos,\n            spatial_shapes=spatial_shapes,\n            level_start_index=level_start_index,\n            cam_params=cam_params,\n            gt_bboxes_3d=gt_bboxes_3d,\n            pred_img_depth=pred_img_depth,\n            prev_bev=prev_bev,\n            bev_mask=bev_mask,\n            **kwargs\n        )\n\n        return bev_embed\n\n"
  },
  {
    "path": "mmdet3d/models/fbbev/view_transformation/backward_projection/bevformer_utils/bevformer_encoder.py",
    "content": "# Copyright (c) 2022-2023, NVIDIA Corporation & Affiliates. All rights reserved. \r\n# \r\n# This work is made available under the Nvidia Source Code License-NC. \r\n# To view a copy of this license, visit \r\n# https://github.com/NVlabs/FB-BEV/blob/main/LICENSE\r\n\r\nfrom .custom_base_transformer_layer import MyCustomBaseTransformerLayer\r\nimport copy\r\nimport warnings\r\nfrom mmcv.cnn.bricks.registry import (ATTENTION,\r\n                                      TRANSFORMER_LAYER,\r\n                                      TRANSFORMER_LAYER_SEQUENCE)\r\nfrom mmcv.cnn.bricks.transformer import TransformerLayerSequence\r\nfrom mmcv.runner import force_fp32, auto_fp16\r\nimport numpy as np\r\nimport torch\r\nimport cv2 as cv\r\nimport mmcv\r\nimport time\r\nfrom mmcv.utils import TORCH_VERSION, digit_version\r\nfrom mmcv.utils import ext_loader\r\n\r\next_module = ext_loader.load_ext(\r\n    '_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward'])\r\n\r\n\r\n@TRANSFORMER_LAYER_SEQUENCE.register_module()\r\nclass bevformer_encoder(TransformerLayerSequence):\r\n\r\n    \"\"\"\r\n    Attention with both self and cross\r\n    Implements the decoder in DETR transformer.\r\n    Args:\r\n        return_intermediate (bool): Whether to return intermediate outputs.\r\n        coder_norm_cfg (dict): Config of last normalization layer. Default：\r\n            `LN`.\r\n    \"\"\"\r\n\r\n    def __init__(self, *args, pc_range=None, grid_config=None, data_config=None, return_intermediate=False, dataset_type='nuscenes', fix_bug=False,\r\n                 **kwargs):\r\n\r\n        super(bevformer_encoder, self).__init__(*args, **kwargs)\r\n        self.return_intermediate = return_intermediate\r\n        self.fix_bug = fix_bug\r\n        self.x_bound = grid_config['x']\r\n        self.y_bound = grid_config['y']\r\n        self.z_bound = grid_config['z']\r\n        self.final_dim = data_config['input_size']\r\n        self.pc_range = pc_range\r\n        self.fp16_enabled = False\r\n\r\n    def get_reference_points(self,H, W, Z=8, dim='3d', bs=1, device='cuda', dtype=torch.float):\r\n        \"\"\"Get the reference points used in SCA and TSA.\r\n        Args:\r\n            H, W: spatial shape of bev.\r\n            Z: hight of pillar.\r\n            D: sample D points uniformly from each pillar.\r\n            device (obj:`device`): The device where\r\n                reference_points should be.\r\n        Returns:\r\n            Tensor: reference points used in decoder, has \\\r\n                shape (bs, num_keys, num_levels, 2).\r\n        \"\"\"\r\n\r\n        # reference points in 3D space, used in spatial cross-attention (SCA)\r\n        if dim == '3d':\r\n\r\n            X = torch.arange(*self.x_bound, dtype=torch.float) + self.x_bound[-1]/2\r\n            Y = torch.arange(*self.y_bound, dtype=torch.float) + self.y_bound[-1]/2\r\n            Z = torch.arange(*self.z_bound, dtype=torch.float) + self.z_bound[-1]/2\r\n            Y, X, Z = torch.meshgrid([Y, X, Z])\r\n            coords = torch.stack([X, Y, Z], dim=-1)\r\n            coords = coords.to(dtype).to(device)\r\n            # frustum = torch.cat([coords, torch.ones_like(coords[...,0:1])], dim=-1) #(x, y, z, 4)\r\n            return coords\r\n\r\n        # reference points on 2D bev plane, used in temporal self-attention (TSA).\r\n        elif dim == '2d':\r\n            ref_y, ref_x = torch.meshgrid(\r\n                torch.linspace(\r\n                    0.5, H - 0.5, H, dtype=dtype, device=device),\r\n                torch.linspace(\r\n                    0.5, W - 0.5, W, dtype=dtype, device=device)\r\n            )\r\n            ref_y = ref_y.reshape(-1)[None] / H\r\n            ref_x = ref_x.reshape(-1)[None] / W\r\n            ref_2d = torch.stack((ref_x, ref_y), -1)\r\n            ref_2d = ref_2d.repeat(bs, 1, 1).unsqueeze(2)\r\n            return ref_2d\r\n    \r\n    @force_fp32(apply_to=('reference_points', 'cam_params'))\r\n    def point_sampling(self, reference_points, pc_range,  img_metas, cam_params=None, gt_bboxes_3d=None):\r\n\r\n        rots, trans, intrins, post_rots, post_trans, bda = cam_params\r\n        B, N, _ = trans.shape\r\n        eps = 1e-5\r\n        ogfH, ogfW = self.final_dim\r\n        reference_points = reference_points[None, None].repeat(B, N, 1, 1, 1, 1)\r\n        reference_points = torch.inverse(bda).view(B, 1, 1, 1, 1, 3,\r\n                          3).matmul(reference_points.unsqueeze(-1)).squeeze(-1)\r\n        reference_points -= trans.view(B, N, 1, 1, 1, 3)\r\n        combine = rots.matmul(torch.inverse(intrins)).inverse()\r\n        reference_points_cam = combine.view(B, N, 1, 1, 1, 3, 3).matmul(reference_points.unsqueeze(-1)).squeeze(-1)\r\n        reference_points_cam = torch.cat([reference_points_cam[..., 0:2] / torch.maximum(\r\n            reference_points_cam[..., 2:3], torch.ones_like(reference_points_cam[..., 2:3])*eps),  reference_points_cam[..., 2:3]], 5\r\n            )\r\n        reference_points_cam = post_rots.view(B, N, 1, 1, 1, 3, 3).matmul(reference_points_cam.unsqueeze(-1)).squeeze(-1)\r\n        reference_points_cam += post_trans.view(B, N, 1, 1, 1, 3) \r\n        reference_points_cam[..., 0] /= ogfW\r\n        reference_points_cam[..., 1] /= ogfH\r\n        mask = (reference_points_cam[..., 2:3] > eps)\r\n        mask = (mask & (reference_points_cam[..., 0:1] > eps) \r\n                 & (reference_points_cam[..., 0:1] < (1.0-eps)) \r\n                 & (reference_points_cam[..., 1:2] > eps) \r\n                 & (reference_points_cam[..., 1:2] < (1.0-eps)))\r\n        B, N, H, W, D, _ = reference_points_cam.shape\r\n        reference_points_cam = reference_points_cam.permute(1, 0, 2, 3, 4, 5).reshape(N, B, H*W, D, 3)\r\n        mask = mask.permute(1, 0, 2, 3, 4, 5).reshape(N, B, H*W, D, 1).squeeze(-1)\r\n\r\n        return reference_points, reference_points_cam[..., :2], mask, reference_points_cam[..., 2:3]\r\n\r\n\r\n    @auto_fp16()\r\n    def forward(self,\r\n                bev_query,\r\n                key,\r\n                value,\r\n                *args,\r\n                bev_h=None,\r\n                bev_w=None,\r\n                bev_pos=None,\r\n                spatial_shapes=None,\r\n                level_start_index=None,\r\n                valid_ratios=None,\r\n                cam_params=None,\r\n                gt_bboxes_3d=None,\r\n                pred_img_depth=None,\r\n                bev_mask=None,\r\n                prev_bev=None,\r\n                **kwargs):\r\n        \"\"\"Forward function for `TransformerDecoder`.\r\n        Args:\r\n            bev_query (Tensor): Input BEV query with shape\r\n                `(num_query, bs, embed_dims)`.\r\n            key & value (Tensor): Input multi-cameta features with shape\r\n                (num_cam, num_value, bs, embed_dims)\r\n            reference_points (Tensor): The reference\r\n                points of offset. has shape\r\n                (bs, num_query, 4) when as_two_stage,\r\n                otherwise has shape ((bs, num_query, 2).\r\n            valid_ratios (Tensor): The radios of valid\r\n                points on the feature map, has shape\r\n                (bs, num_levels, 2)\r\n        Returns:\r\n            Tensor: Results with shape [1, num_query, bs, embed_dims] when\r\n                return_intermediate is `False`, otherwise it has shape\r\n                [num_layers, num_query, bs, embed_dims].\r\n        \"\"\"\r\n\r\n        output = bev_query\r\n        intermediate = []\r\n\r\n        ref_3d = self.get_reference_points(\r\n            bev_h, bev_w, self.pc_range[5]-self.pc_range[2], dim='3d', bs=bev_query.size(1),  device=bev_query.device, dtype=bev_query.dtype)\r\n        ref_2d = self.get_reference_points(\r\n            bev_h, bev_w, dim='2d', bs=bev_query.size(1), device=bev_query.device, dtype=bev_query.dtype)\r\n\r\n        ref_3d, reference_points_cam, per_cam_mask_list, bev_query_depth = self.point_sampling(\r\n            ref_3d, self.pc_range, kwargs['img_metas'], cam_params=cam_params, gt_bboxes_3d=gt_bboxes_3d)\r\n\r\n        bev_query = bev_query.permute(1, 0, 2)\r\n        bev_pos = bev_pos.permute(1, 0, 2)\r\n        bs, len_bev, num_bev_level, _ = ref_2d.shape\r\n        for lid, layer in enumerate(self.layers):\r\n           \r\n            output = layer(\r\n                bev_query,\r\n                key,\r\n                value,\r\n                *args,\r\n                bev_pos=bev_pos,\r\n                ref_2d=ref_2d,\r\n                ref_3d=ref_3d,\r\n                bev_h=bev_h,\r\n                bev_w=bev_w,\r\n                prev_bev=prev_bev,\r\n                spatial_shapes=spatial_shapes,\r\n                level_start_index=level_start_index,\r\n                reference_points_cam=reference_points_cam,\r\n                per_cam_mask_list=per_cam_mask_list,\r\n                bev_mask=bev_mask,\r\n                bev_query_depth=bev_query_depth,\r\n                pred_img_depth=pred_img_depth,\r\n                **kwargs)\r\n\r\n            bev_query = output\r\n            if self.return_intermediate:\r\n                intermediate.append(output)\r\n\r\n        if self.return_intermediate:\r\n            return torch.stack(intermediate)\r\n\r\n        return output\r\n\r\n\r\n@TRANSFORMER_LAYER.register_module()\r\nclass BEVFormerEncoderLayer(MyCustomBaseTransformerLayer):\r\n    \"\"\"Implements decoder layer in DETR transformer.\r\n    Args:\r\n        attn_cfgs (list[`mmcv.ConfigDict`] | list[dict] | dict )):\r\n            Configs for self_attention or cross_attention, the order\r\n            should be consistent with it in `operation_order`. If it is\r\n            a dict, it would be expand to the number of attention in\r\n            `operation_order`.\r\n        feedforward_channels (int): The hidden dimension for FFNs.\r\n        ffn_dropout (float): Probability of an element to be zeroed\r\n            in ffn. Default 0.0.\r\n        operation_order (tuple[str]): The execution order of operation\r\n            in transformer. Such as ('self_attn', 'norm', 'ffn', 'norm').\r\n            Default：None\r\n        act_cfg (dict): The activation config for FFNs. Default: `LN`\r\n        norm_cfg (dict): Config dict for normalization layer.\r\n            Default: `LN`.\r\n        ffn_num_fcs (int): The number of fully-connected layers in FFNs.\r\n            Default：2.\r\n    \"\"\"\r\n\r\n    def __init__(self,\r\n                 attn_cfgs,\r\n                 feedforward_channels=512,\r\n                 ffn_dropout=0.0,\r\n                 operation_order=None,\r\n                 act_cfg=dict(type='ReLU', inplace=True),\r\n                 norm_cfg=dict(type='LN'),\r\n                 ffn_num_fcs=2,\r\n                 **kwargs):\r\n        super(BEVFormerEncoderLayer, self).__init__(\r\n            attn_cfgs=attn_cfgs,\r\n            feedforward_channels=feedforward_channels,\r\n            ffn_dropout=ffn_dropout,\r\n            operation_order=operation_order,\r\n            act_cfg=act_cfg,\r\n            norm_cfg=norm_cfg,\r\n            ffn_num_fcs=ffn_num_fcs,\r\n            **kwargs)\r\n        self.fp16_enabled = False\r\n        assert len(operation_order) in {2, 4, 6}\r\n        # assert set(operation_order) in set(['self_attn', 'norm', 'cross_attn', 'ffn'])\r\n\r\n    @force_fp32()\r\n    def forward(self,\r\n                query,\r\n                key=None,\r\n                value=None,\r\n                bev_pos=None,\r\n                query_pos=None,\r\n                key_pos=None,\r\n                attn_masks=None,\r\n                query_key_padding_mask=None,\r\n                key_padding_mask=None,\r\n                ref_2d=None,\r\n                ref_3d=None,\r\n                bev_h=None,\r\n                bev_w=None,\r\n                reference_points_cam=None,\r\n                mask=None,\r\n                spatial_shapes=None,\r\n                level_start_index=None,\r\n                prev_bev=None,\r\n                debug=False,\r\n                bev_mask=None,\r\n                bev_query_depth=None,\r\n                per_cam_mask_list=None,\r\n                lidar_bev=None,\r\n                pred_img_depth=None, \r\n                **kwargs):\r\n        \"\"\"Forward function for `TransformerDecoderLayer`.\r\n\r\n        **kwargs contains some specific arguments of attentions.\r\n\r\n        Args:\r\n            query (Tensor): The input query with shape\r\n                [num_queries, bs, embed_dims] if\r\n                self.batch_first is False, else\r\n                [bs, num_queries embed_dims].\r\n            key (Tensor): The key tensor with shape [num_keys, bs,\r\n                embed_dims] if self.batch_first is False, else\r\n                [bs, num_keys, embed_dims] .\r\n            value (Tensor): The value tensor with same shape as `key`.\r\n            query_pos (Tensor): The positional encoding for `query`.\r\n                Default: None.\r\n            key_pos (Tensor): The positional encoding for `key`.\r\n                Default: None.\r\n            attn_masks (List[Tensor] | None): 2D Tensor used in\r\n                calculation of corresponding attention. The length of\r\n                it should equal to the number of `attention` in\r\n                `operation_order`. Default: None.\r\n            query_key_padding_mask (Tensor): ByteTensor for `query`, with\r\n                shape [bs, num_queries]. Only used in `self_attn` layer.\r\n                Defaults to None.\r\n            key_padding_mask (Tensor): ByteTensor for `query`, with\r\n                shape [bs, num_keys]. Default: None.\r\n\r\n        Returns:\r\n            Tensor: forwarded results with shape [num_queries, bs, embed_dims].\r\n        \"\"\"\r\n\r\n        norm_index = 0\r\n        attn_index = 0\r\n        ffn_index = 0\r\n        identity = query\r\n        if attn_masks is None:\r\n            attn_masks = [None for _ in range(self.num_attn)]\r\n        elif isinstance(attn_masks, torch.Tensor):\r\n            attn_masks = [\r\n                copy.deepcopy(attn_masks) for _ in range(self.num_attn)\r\n            ]\r\n            warnings.warn(f'Use same attn_mask in all attentions in '\r\n                          f'{self.__class__.__name__} ')\r\n        else:\r\n            assert len(attn_masks) == self.num_attn, f'The length of ' \\\r\n                                                     f'attn_masks {len(attn_masks)} must be equal ' \\\r\n                                                     f'to the number of attention in ' \\\r\n                f'operation_order {self.num_attn}'\r\n        for layer in self.operation_order:\r\n            # temporal self attention\r\n            if layer == 'self_attn':\r\n                query = self.attentions[attn_index](\r\n                    query,\r\n                    None,\r\n                    None,\r\n                    identity if self.pre_norm else None,\r\n                    query_pos=bev_pos,\r\n                    key_pos=bev_pos,\r\n                    attn_mask=attn_masks[attn_index],\r\n                    key_padding_mask=bev_mask,\r\n                    reference_points=ref_2d,\r\n                    spatial_shapes=torch.tensor(\r\n                        [[bev_h, bev_w]], device=query.device),\r\n                    level_start_index=torch.tensor([0], device=query.device),\r\n                    **kwargs)\r\n                attn_index += 1\r\n                identity = query\r\n\r\n            elif layer == 'norm':\r\n                query = self.norms[norm_index](query)\r\n                norm_index += 1\r\n\r\n            # spaital cross attention\r\n            elif layer == 'cross_attn':\r\n                query = self.attentions[attn_index](\r\n                    query,\r\n                    key,\r\n                    value,\r\n                    identity if self.pre_norm else None,\r\n                    query_pos=bev_pos,\r\n                    key_pos=key_pos,\r\n                    reference_points=ref_3d,\r\n                    reference_points_cam=reference_points_cam,\r\n                    attn_mask=attn_masks[attn_index],\r\n                    key_padding_mask=key_padding_mask,\r\n                    spatial_shapes=spatial_shapes,\r\n                    level_start_index=level_start_index,\r\n                    bev_query_depth=bev_query_depth,\r\n                    pred_img_depth=pred_img_depth,\r\n                    bev_mask=bev_mask,\r\n                    per_cam_mask_list=per_cam_mask_list,\r\n                    **kwargs)\r\n                attn_index += 1\r\n                identity = query\r\n\r\n            elif layer == 'ffn':\r\n                query = self.ffns[ffn_index](\r\n                    query, identity if self.pre_norm else None)\r\n                ffn_index += 1\r\n\r\n        return query\r\n\r\n"
  },
  {
    "path": "mmdet3d/models/fbbev/view_transformation/backward_projection/bevformer_utils/custom_base_transformer_layer.py",
    "content": "# ---------------------------------------------\r\n# Copyright (c) OpenMMLab. All rights reserved.\r\n# ---------------------------------------------\r\n#  Modified by Zhiqi Li\r\n# ---------------------------------------------\r\n\r\nimport copy\r\nimport warnings\r\n\r\nimport torch\r\nimport torch.nn as nn\r\n\r\nfrom mmcv import ConfigDict, deprecated_api_warning\r\nfrom mmcv.cnn import Linear, build_activation_layer, build_norm_layer\r\nfrom mmcv.runner.base_module import BaseModule, ModuleList, Sequential\r\n\r\nfrom mmcv.cnn.bricks.registry import (ATTENTION, FEEDFORWARD_NETWORK, POSITIONAL_ENCODING,\r\n                                      TRANSFORMER_LAYER, TRANSFORMER_LAYER_SEQUENCE)\r\n\r\n# Avoid BC-breaking of importing MultiScaleDeformableAttention from this file\r\ntry:\r\n    from mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention  # noqa F401\r\n    warnings.warn(\r\n        ImportWarning(\r\n            '``MultiScaleDeformableAttention`` has been moved to '\r\n            '``mmcv.ops.multi_scale_deform_attn``, please change original path '  # noqa E501\r\n            '``from mmcv.cnn.bricks.transformer import MultiScaleDeformableAttention`` '  # noqa E501\r\n            'to ``from mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention`` '  # noqa E501\r\n        ))\r\nexcept ImportError:\r\n    warnings.warn('Fail to import ``MultiScaleDeformableAttention`` from '\r\n                  '``mmcv.ops.multi_scale_deform_attn``, '\r\n                  'You should install ``mmcv-full`` if you need this module. ')\r\nfrom mmcv.cnn.bricks.transformer import build_feedforward_network, build_attention\r\n\r\n\r\n# @TRANSFORMER_LAYER.register_module()\r\nclass MyCustomBaseTransformerLayer(BaseModule):\r\n    \"\"\"Base `TransformerLayer` for vision transformer.\r\n    It can be built from `mmcv.ConfigDict` and support more flexible\r\n    customization, for example, using any number of `FFN or LN ` and\r\n    use different kinds of `attention` by specifying a list of `ConfigDict`\r\n    named `attn_cfgs`. It is worth mentioning that it supports `prenorm`\r\n    when you specifying `norm` as the first element of `operation_order`.\r\n    More details about the `prenorm`: `On Layer Normalization in the\r\n    Transformer Architecture <https://arxiv.org/abs/2002.04745>`_ .\r\n    Args:\r\n        attn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )):\r\n            Configs for `self_attention` or `cross_attention` modules,\r\n            The order of the configs in the list should be consistent with\r\n            corresponding attentions in operation_order.\r\n            If it is a dict, all of the attention modules in operation_order\r\n            will be built with this config. Default: None.\r\n        ffn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )):\r\n            Configs for FFN, The order of the configs in the list should be\r\n            consistent with corresponding ffn in operation_order.\r\n            If it is a dict, all of the attention modules in operation_order\r\n            will be built with this config.\r\n        operation_order (tuple[str]): The execution order of operation\r\n            in transformer. Such as ('self_attn', 'norm', 'ffn', 'norm').\r\n            Support `prenorm` when you specifying first element as `norm`.\r\n            Default：None.\r\n        norm_cfg (dict): Config dict for normalization layer.\r\n            Default: dict(type='LN').\r\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\r\n            Default: None.\r\n        batch_first (bool): Key, Query and Value are shape\r\n            of (batch, n, embed_dim)\r\n            or (n, batch, embed_dim). Default to False.\r\n    \"\"\"\r\n\r\n    def __init__(self,\r\n                 attn_cfgs=None,\r\n                 ffn_cfgs=dict(\r\n                     type='FFN',\r\n                     embed_dims=256,\r\n                     feedforward_channels=1024,\r\n                     num_fcs=2,\r\n                     ffn_drop=0.,\r\n                     act_cfg=dict(type='ReLU', inplace=True),\r\n                 ),\r\n                 operation_order=None,\r\n                 norm_cfg=dict(type='LN'),\r\n                 init_cfg=None,\r\n                 batch_first=True,\r\n                 **kwargs):\r\n\r\n        deprecated_args = dict(\r\n            feedforward_channels='feedforward_channels',\r\n            ffn_dropout='ffn_drop',\r\n            ffn_num_fcs='num_fcs')\r\n        for ori_name, new_name in deprecated_args.items():\r\n            if ori_name in kwargs:\r\n                warnings.warn(\r\n                    f'The arguments `{ori_name}` in BaseTransformerLayer '\r\n                    f'has been deprecated, now you should set `{new_name}` '\r\n                    f'and other FFN related arguments '\r\n                    f'to a dict named `ffn_cfgs`. ')\r\n                if ffn_cfgs:\r\n                    ffn_cfgs[new_name] = kwargs[ori_name]\r\n\r\n        super(MyCustomBaseTransformerLayer, self).__init__(init_cfg)\r\n\r\n        self.batch_first = batch_first\r\n\r\n        # assert set(operation_order) & set(\r\n        #     ['self_attn', 'norm', 'ffn', 'cross_attn']) == \\\r\n        #     set(operation_order), f'The operation_order of' \\\r\n        #     f' {self.__class__.__name__} should ' \\\r\n        #     f'contains all four operation type ' \\\r\n        #     f\"{['self_attn', 'norm', 'ffn', 'cross_attn']}\"\r\n\r\n        num_attn = operation_order.count('self_attn') + operation_order.count(\r\n            'cross_attn')\r\n        if isinstance(attn_cfgs, dict):\r\n            attn_cfgs = [copy.deepcopy(attn_cfgs) for _ in range(num_attn)]\r\n        else:\r\n            assert num_attn == len(attn_cfgs), f'The length ' \\\r\n                f'of attn_cfg {num_attn} is ' \\\r\n                f'not consistent with the number of attention' \\\r\n                f'in operation_order {operation_order}.'\r\n\r\n        self.num_attn = num_attn\r\n        self.operation_order = operation_order\r\n        self.norm_cfg = norm_cfg\r\n        self.pre_norm = operation_order[0] == 'norm'\r\n        self.attentions = ModuleList()\r\n\r\n        index = 0\r\n        for operation_name in operation_order:\r\n            if operation_name in ['self_attn', 'cross_attn']:\r\n                if 'batch_first' in attn_cfgs[index]:\r\n                    assert self.batch_first == attn_cfgs[index]['batch_first']\r\n                else:\r\n                    attn_cfgs[index]['batch_first'] = self.batch_first\r\n                attention = build_attention(attn_cfgs[index])\r\n                # Some custom attentions used as `self_attn`\r\n                # or `cross_attn` can have different behavior.\r\n                attention.operation_name = operation_name\r\n                self.attentions.append(attention)\r\n                index += 1\r\n\r\n        self.embed_dims = self.attentions[0].embed_dims\r\n\r\n        if ffn_cfgs:\r\n            self.ffns = ModuleList()\r\n            num_ffns = operation_order.count('ffn')\r\n            if isinstance(ffn_cfgs, dict):\r\n                ffn_cfgs = ConfigDict(ffn_cfgs)\r\n            if isinstance(ffn_cfgs, dict):\r\n                ffn_cfgs = [copy.deepcopy(ffn_cfgs) for _ in range(num_ffns)]\r\n            assert len(ffn_cfgs) == num_ffns\r\n            for ffn_index in range(num_ffns):\r\n                if 'embed_dims' not in ffn_cfgs[ffn_index]:\r\n                    ffn_cfgs['embed_dims'] = self.embed_dims\r\n                else:\r\n                    assert ffn_cfgs[ffn_index]['embed_dims'] == self.embed_dims\r\n\r\n                self.ffns.append(\r\n                    build_feedforward_network(ffn_cfgs[ffn_index]))\r\n\r\n        self.norms = ModuleList()\r\n        num_norms = operation_order.count('norm')\r\n        for _ in range(num_norms):\r\n            self.norms.append(build_norm_layer(norm_cfg, self.embed_dims)[1])\r\n\r\n    def forward(self,\r\n                query,\r\n                key=None,\r\n                value=None,\r\n                query_pos=None,\r\n                key_pos=None,\r\n                attn_masks=None,\r\n                query_key_padding_mask=None,\r\n                key_padding_mask=None,\r\n                **kwargs):\r\n        \"\"\"Forward function for `TransformerDecoderLayer`.\r\n        **kwargs contains some specific arguments of attentions.\r\n        Args:\r\n            query (Tensor): The input query with shape\r\n                [num_queries, bs, embed_dims] if\r\n                self.batch_first is False, else\r\n                [bs, num_queries embed_dims].\r\n            key (Tensor): The key tensor with shape [num_keys, bs,\r\n                embed_dims] if self.batch_first is False, else\r\n                [bs, num_keys, embed_dims] .\r\n            value (Tensor): The value tensor with same shape as `key`.\r\n            query_pos (Tensor): The positional encoding for `query`.\r\n                Default: None.\r\n            key_pos (Tensor): The positional encoding for `key`.\r\n                Default: None.\r\n            attn_masks (List[Tensor] | None): 2D Tensor used in\r\n                calculation of corresponding attention. The length of\r\n                it should equal to the number of `attention` in\r\n                `operation_order`. Default: None.\r\n            query_key_padding_mask (Tensor): ByteTensor for `query`, with\r\n                shape [bs, num_queries]. Only used in `self_attn` layer.\r\n                Defaults to None.\r\n            key_padding_mask (Tensor): ByteTensor for `query`, with\r\n                shape [bs, num_keys]. Default: None.\r\n        Returns:\r\n            Tensor: forwarded results with shape [num_queries, bs, embed_dims].\r\n        \"\"\"\r\n\r\n        norm_index = 0\r\n        attn_index = 0\r\n        ffn_index = 0\r\n        identity = query\r\n        if attn_masks is None:\r\n            attn_masks = [None for _ in range(self.num_attn)]\r\n        elif isinstance(attn_masks, torch.Tensor):\r\n            attn_masks = [\r\n                copy.deepcopy(attn_masks) for _ in range(self.num_attn)\r\n            ]\r\n            warnings.warn(f'Use same attn_mask in all attentions in '\r\n                          f'{self.__class__.__name__} ')\r\n        else:\r\n            assert len(attn_masks) == self.num_attn, f'The length of ' \\\r\n                f'attn_masks {len(attn_masks)} must be equal ' \\\r\n                f'to the number of attention in ' \\\r\n                f'operation_order {self.num_attn}'\r\n\r\n        for layer in self.operation_order:\r\n            if layer == 'self_attn':\r\n                temp_key = temp_value = query\r\n                query = self.attentions[attn_index](\r\n                    query,\r\n                    temp_key,\r\n                    temp_value,\r\n                    identity if self.pre_norm else None,\r\n                    query_pos=query_pos,\r\n                    key_pos=query_pos,\r\n                    attn_mask=attn_masks[attn_index],\r\n                    key_padding_mask=query_key_padding_mask,\r\n                    **kwargs)\r\n                attn_index += 1\r\n                identity = query\r\n\r\n            elif layer == 'norm':\r\n                query = self.norms[norm_index](query)\r\n                norm_index += 1\r\n\r\n            elif layer == 'cross_attn':\r\n                query = self.attentions[attn_index](\r\n                    query,\r\n                    key,\r\n                    value,\r\n                    identity if self.pre_norm else None,\r\n                    query_pos=query_pos,\r\n                    key_pos=key_pos,\r\n                    attn_mask=attn_masks[attn_index],\r\n                    key_padding_mask=key_padding_mask,\r\n                    **kwargs)\r\n                attn_index += 1\r\n                identity = query\r\n\r\n            elif layer == 'ffn':\r\n                query = self.ffns[ffn_index](\r\n                    query, identity if self.pre_norm else None)\r\n                ffn_index += 1\r\n\r\n        return query\r\n"
  },
  {
    "path": "mmdet3d/models/fbbev/view_transformation/backward_projection/bevformer_utils/multi_scale_deformable_attn_function.py",
    "content": "# ---------------------------------------------\r\n# Copyright (c) OpenMMLab. All rights reserved.\r\n# ---------------------------------------------\r\n#  Modified by Zhiqi Li\r\n# ---------------------------------------------\r\n\r\nimport torch\r\nfrom torch.cuda.amp import custom_bwd, custom_fwd\r\nfrom torch.autograd.function import Function, once_differentiable\r\nfrom mmcv.utils import ext_loader\r\next_module = ext_loader.load_ext(\r\n    '_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward'])\r\n\r\n\r\nclass MultiScaleDeformableAttnFunction_fp16(Function):\r\n\r\n    @staticmethod\r\n    @custom_fwd(cast_inputs=torch.float16)\r\n    def forward(ctx, value, value_spatial_shapes, value_level_start_index,\r\n                sampling_locations, attention_weights, im2col_step):\r\n        \"\"\"GPU version of multi-scale deformable attention.\r\n\r\n        Args:\r\n            value (Tensor): The value has shape\r\n                (bs, num_keys, mum_heads, embed_dims//num_heads)\r\n            value_spatial_shapes (Tensor): Spatial shape of\r\n                each feature map, has shape (num_levels, 2),\r\n                last dimension 2 represent (h, w)\r\n            sampling_locations (Tensor): The location of sampling points,\r\n                has shape\r\n                (bs ,num_queries, num_heads, num_levels, num_points, 2),\r\n                the last dimension 2 represent (x, y).\r\n            attention_weights (Tensor): The weight of sampling points used\r\n                when calculate the attention, has shape\r\n                (bs ,num_queries, num_heads, num_levels, num_points),\r\n            im2col_step (Tensor): The step used in image to column.\r\n\r\n        Returns:\r\n            Tensor: has shape (bs, num_queries, embed_dims)\r\n        \"\"\"\r\n        ctx.im2col_step = im2col_step\r\n        output = ext_module.ms_deform_attn_forward(\r\n            value,\r\n            value_spatial_shapes,\r\n            value_level_start_index,\r\n            sampling_locations,\r\n            attention_weights,\r\n            im2col_step=ctx.im2col_step)\r\n        ctx.save_for_backward(value, value_spatial_shapes,\r\n                              value_level_start_index, sampling_locations,\r\n                              attention_weights)\r\n        return output\r\n\r\n    @staticmethod\r\n    @once_differentiable\r\n    @custom_bwd\r\n    def backward(ctx, grad_output):\r\n        \"\"\"GPU version of backward function.\r\n\r\n        Args:\r\n            grad_output (Tensor): Gradient\r\n                of output tensor of forward.\r\n\r\n        Returns:\r\n             Tuple[Tensor]: Gradient\r\n                of input tensors in forward.\r\n        \"\"\"\r\n        value, value_spatial_shapes, value_level_start_index, \\\r\n            sampling_locations, attention_weights = ctx.saved_tensors\r\n        grad_value = torch.zeros_like(value)\r\n        grad_sampling_loc = torch.zeros_like(sampling_locations)\r\n        grad_attn_weight = torch.zeros_like(attention_weights)\r\n\r\n        ext_module.ms_deform_attn_backward(\r\n            value,\r\n            value_spatial_shapes,\r\n            value_level_start_index,\r\n            sampling_locations,\r\n            attention_weights,\r\n            grad_output.contiguous(),\r\n            grad_value,\r\n            grad_sampling_loc,\r\n            grad_attn_weight,\r\n            im2col_step=ctx.im2col_step)\r\n\r\n        return grad_value, None, None, \\\r\n            grad_sampling_loc, grad_attn_weight, None\r\n\r\n\r\nclass MultiScaleDeformableAttnFunction_fp32(Function):\r\n\r\n    @staticmethod\r\n    @custom_fwd(cast_inputs=torch.float32)\r\n    def forward(ctx, value, value_spatial_shapes, value_level_start_index,\r\n                sampling_locations, attention_weights, im2col_step):\r\n        \"\"\"GPU version of multi-scale deformable attention.\r\n\r\n        Args:\r\n            value (Tensor): The value has shape\r\n                (bs, num_keys, mum_heads, embed_dims//num_heads)\r\n            value_spatial_shapes (Tensor): Spatial shape of\r\n                each feature map, has shape (num_levels, 2),\r\n                last dimension 2 represent (h, w)\r\n            sampling_locations (Tensor): The location of sampling points,\r\n                has shape\r\n                (bs ,num_queries, num_heads, num_levels, num_points, 2),\r\n                the last dimension 2 represent (x, y).\r\n            attention_weights (Tensor): The weight of sampling points used\r\n                when calculate the attention, has shape\r\n                (bs ,num_queries, num_heads, num_levels, num_points),\r\n            im2col_step (Tensor): The step used in image to column.\r\n\r\n        Returns:\r\n            Tensor: has shape (bs, num_queries, embed_dims)\r\n        \"\"\"\r\n\r\n        ctx.im2col_step = im2col_step\r\n        output = ext_module.ms_deform_attn_forward(\r\n            value,\r\n            value_spatial_shapes,\r\n            value_level_start_index,\r\n            sampling_locations,\r\n            attention_weights,\r\n            im2col_step=ctx.im2col_step)\r\n        ctx.save_for_backward(value, value_spatial_shapes,\r\n                              value_level_start_index, sampling_locations,\r\n                              attention_weights)\r\n        return output\r\n\r\n    @staticmethod\r\n    @once_differentiable\r\n    @custom_bwd\r\n    def backward(ctx, grad_output):\r\n        \"\"\"GPU version of backward function.\r\n\r\n        Args:\r\n            grad_output (Tensor): Gradient\r\n                of output tensor of forward.\r\n\r\n        Returns:\r\n             Tuple[Tensor]: Gradient\r\n                of input tensors in forward.\r\n        \"\"\"\r\n        value, value_spatial_shapes, value_level_start_index, \\\r\n            sampling_locations, attention_weights = ctx.saved_tensors\r\n        grad_value = torch.zeros_like(value)\r\n        grad_sampling_loc = torch.zeros_like(sampling_locations)\r\n        grad_attn_weight = torch.zeros_like(attention_weights)\r\n\r\n        ext_module.ms_deform_attn_backward(\r\n            value,\r\n            value_spatial_shapes,\r\n            value_level_start_index,\r\n            sampling_locations,\r\n            attention_weights,\r\n            grad_output.contiguous(),\r\n            grad_value,\r\n            grad_sampling_loc,\r\n            grad_attn_weight,\r\n            im2col_step=ctx.im2col_step)\r\n\r\n        return grad_value, None, None, \\\r\n            grad_sampling_loc, grad_attn_weight, None\r\n"
  },
  {
    "path": "mmdet3d/models/fbbev/view_transformation/backward_projection/bevformer_utils/positional_encoding.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport math\n\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn.bricks.transformer import POSITIONAL_ENCODING\nfrom mmcv.runner import BaseModule\n\n\n\n@POSITIONAL_ENCODING.register_module()\nclass CustormLearnedPositionalEncoding(BaseModule):\n    \"\"\"Position embedding with learnable embedding weights.\n\n    Args:\n        num_feats (int): The feature dimension for each position\n            along x-axis or y-axis. The final returned dimension for\n            each position is 2 times of this value.\n        row_num_embed (int, optional): The dictionary size of row embeddings.\n            Default 50.\n        col_num_embed (int, optional): The dictionary size of col embeddings.\n            Default 50.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 num_feats,\n                 row_num_embed=50,\n                 col_num_embed=50,\n                 init_cfg=dict(type='Uniform', layer='Embedding')):\n        super(CustormLearnedPositionalEncoding, self).__init__(init_cfg)\n        self.row_embed = nn.Embedding(row_num_embed, num_feats)\n        self.col_embed = nn.Embedding(col_num_embed, num_feats)\n        self.num_feats = num_feats\n        self.row_num_embed = row_num_embed\n        self.col_num_embed = col_num_embed\n\n    def forward(self, bs, h, w, device):\n        \"\"\"Forward function for `LearnedPositionalEncoding`.\n\n        Args:\n            mask (Tensor): ByteTensor mask. Non-zero values representing\n                ignored positions, while zero values means valid positions\n                for this image. Shape [bs, h, w].\n\n        Returns:\n            pos (Tensor): Returned position embedding with shape\n                [bs, num_feats*2, h, w].\n        \"\"\"\n        # h, w = mask.shape[-2:]\n        x = torch.arange(w, device=device)\n        y = torch.arange(h, device=device)\n        x_embed = self.col_embed(x)\n        y_embed = self.row_embed(y)\n        pos = torch.cat(\n            (x_embed.unsqueeze(0).repeat(h, 1, 1), y_embed.unsqueeze(1).repeat(\n                1, w, 1)),\n            dim=-1).permute(2, 0,\n                            1).unsqueeze(0).repeat(bs, 1, 1, 1)\n        return pos\n\n    def __repr__(self):\n        \"\"\"str: a string that describes the module\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(num_feats={self.num_feats}, '\n        repr_str += f'row_num_embed={self.row_num_embed}, '\n        repr_str += f'col_num_embed={self.col_num_embed})'\n        return repr_str\n"
  },
  {
    "path": "mmdet3d/models/fbbev/view_transformation/backward_projection/bevformer_utils/spatial_cross_attention_depth.py",
    "content": "# Copyright (c) 2022-2023, NVIDIA Corporation & Affiliates. All rights reserved. \r\n# \r\n# This work is made available under the Nvidia Source Code License-NC. \r\n# To view a copy of this license, visit \r\n# https://github.com/NVlabs/FB-BEV/blob/main/LICENSE\r\n\r\nfrom mmcv.ops.multi_scale_deform_attn import multi_scale_deformable_attn_pytorch\r\nimport warnings\r\nimport torch\r\nimport torch.nn as nn\r\nimport torch.nn.functional as F\r\nfrom mmcv.cnn import xavier_init, constant_init\r\nfrom mmcv.cnn.bricks.registry import (ATTENTION,\r\n                                      TRANSFORMER_LAYER,\r\n                                      TRANSFORMER_LAYER_SEQUENCE)\r\nfrom mmcv.cnn.bricks.transformer import build_attention\r\nimport math\r\nfrom mmcv.runner import force_fp32, auto_fp16\r\n\r\nfrom mmcv.runner.base_module import BaseModule, ModuleList, Sequential\r\n\r\nfrom mmcv.utils import ext_loader\r\nfrom .multi_scale_deformable_attn_function import MultiScaleDeformableAttnFunction_fp32, \\\r\n    MultiScaleDeformableAttnFunction_fp16\r\next_module = ext_loader.load_ext(\r\n    '_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward'])\r\n\r\n\r\n\r\n@ATTENTION.register_module()\r\nclass DA_SpatialCrossAttention(BaseModule):\r\n    \"\"\"An attention module used in BEVFormer.\r\n    Args:\r\n        embed_dims (int): The embedding dimension of Attention.\r\n            Default: 256.\r\n        num_cams (int): The number of cameras\r\n        dropout (float): A Dropout layer on `inp_residual`.\r\n            Default: 0..\r\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\r\n            Default: None.\r\n        deformable_attention: (dict): The config for the deformable attention used in SCA.\r\n    \"\"\"\r\n\r\n    def __init__(self,\r\n                 embed_dims=256,\r\n                 num_cams=6,\r\n                 pc_range=None,\r\n                 dropout=0.1,\r\n                 init_cfg=None,\r\n                 batch_first=False,\r\n                 deformable_attention=dict(\r\n                     type='MSDeformableAttention3D',\r\n                     embed_dims=256,\r\n                     num_levels=4),\r\n                layer_scale=None,\r\n                dbound=None,\r\n                 **kwargs\r\n                 ):\r\n        super(DA_SpatialCrossAttention, self).__init__(init_cfg)\r\n\r\n        self.init_cfg = init_cfg\r\n        self.dropout = nn.Dropout(dropout)\r\n        self.pc_range = pc_range\r\n        self.fp16_enabled = False\r\n        self.deformable_attention = build_attention(deformable_attention)\r\n        self.embed_dims = embed_dims\r\n        self.num_cams = num_cams\r\n        self.dbound = dbound\r\n        self.output_proj = nn.Linear(embed_dims, embed_dims)\r\n        self.batch_first = batch_first\r\n        if layer_scale is not None:\r\n            self.layer_scale =  nn.Parameter(\r\n                layer_scale * torch.ones(embed_dims),\r\n                requires_grad=True)\r\n        else:\r\n            self.layer_scale = None\r\n        self.init_weight()\r\n        self.count = 0\r\n\r\n    def init_weight(self):\r\n        \"\"\"Default initialization for Parameters of Module.\"\"\"\r\n        xavier_init(self.output_proj, distribution='uniform', bias=0.)\r\n    \r\n    @force_fp32(apply_to=('query', 'key', 'value', 'query_pos', 'reference_points_cam'))\r\n    def forward(self,\r\n                query,\r\n                key,\r\n                value,\r\n                residual=None,\r\n                query_pos=None,\r\n                key_padding_mask=None,\r\n                reference_points=None,\r\n                spatial_shapes=None,\r\n                reference_points_cam=None,\r\n                level_start_index=None,\r\n                flag='encoder',\r\n                bev_query_depth=None,\r\n                pred_img_depth=None,\r\n                bev_mask=None,\r\n                per_cam_mask_list=None,                \r\n                **kwargs):\r\n        \"\"\"Forward Function of Detr3DCrossAtten.\r\n        Args:\r\n            query (Tensor): Query of Transformer with shape\r\n                (num_query, bs, embed_dims).\r\n            key (Tensor): The key tensor with shape\r\n                `(num_key, bs, embed_dims)`.\r\n            value (Tensor): The value tensor with shape\r\n                `(num_key, bs, embed_dims)`. (B, N, C, H, W)\r\n            residual (Tensor): The tensor used for addition, with the\r\n                same shape as `x`. Default None. If None, `x` will be used.\r\n            query_pos (Tensor): The positional encoding for `query`.\r\n                Default: None.\r\n            key_pos (Tensor): The positional encoding for  `key`. Default\r\n                None.\r\n            reference_points (Tensor):  The normalized reference\r\n                points with shape (bs, num_query, 4),\r\n                all elements is range in [0, 1], top-left (0,0),\r\n                bottom-right (1, 1), including padding area.\r\n                or (N, Length_{query}, num_levels, 4), add\r\n                additional two dimensions is (w, h) to\r\n                form reference boxes.\r\n            key_padding_mask (Tensor): ByteTensor for `query`, with\r\n                shape [bs, num_key].\r\n            spatial_shapes (Tensor): Spatial shape of features in\r\n                different level. With shape  (num_levels, 2),\r\n                last dimension represent (h, w).\r\n            level_start_index (Tensor): The start index of each level.\r\n                A tensor has shape (num_levels) and can be represented\r\n                as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].\r\n        Returns:\r\n             Tensor: forwarded results with shape [num_query, bs, embed_dims].\r\n        \"\"\"\r\n\r\n        N, B, len_query, Z, _ = bev_query_depth.shape\r\n        B, N, DC, H, W = pred_img_depth.shape\r\n        bev_query_depth = bev_query_depth.permute(1, 0, 2, 3, 4) \r\n        pred_img_depth = pred_img_depth.view(B*N, DC, H, W)\r\n        pred_img_depth = pred_img_depth.flatten(2).permute(0, 2, 1)\r\n\r\n        if key is None:\r\n            key = query\r\n        if value is None:\r\n            value = key\r\n\r\n        if residual is None:\r\n            inp_residual = query\r\n            slots = torch.zeros_like(query)\r\n        if query_pos is not None:\r\n            query = query + query_pos\r\n\r\n        bs, num_query, _ = query.size()\r\n\r\n        D = reference_points_cam.size(3)\r\n        indexes = [[] for _ in range(bs)]\r\n\r\n        if bev_mask is not None:\r\n            per_cam_mask_list_ = per_cam_mask_list & bev_mask[None, :, :, None]\r\n        else:\r\n            per_cam_mask_list_ = per_cam_mask_list\r\n        max_len = 0\r\n        for j in range(bs):\r\n            for i, per_cam_mask in enumerate(per_cam_mask_list_):\r\n                index_query_per_img = per_cam_mask[j].sum(-1).nonzero().squeeze(-1)\r\n                if len(index_query_per_img) == 0:\r\n                    index_query_per_img = per_cam_mask_list[i][j].sum(-1).nonzero().squeeze(-1)[0:1]\r\n                indexes[j].append(index_query_per_img)\r\n                max_len = max(max_len, len(index_query_per_img))\r\n        \r\n\r\n        # each camera only interacts with its corresponding BEV queries. This step can  greatly save GPU memory.\r\n        queries_rebatch = query.new_zeros(\r\n            [bs, self.num_cams, max_len, self.embed_dims])\r\n        reference_points_rebatch = reference_points_cam.new_zeros(\r\n            [bs, self.num_cams, max_len, D, 2])\r\n        bev_query_depth_rebatch = reference_points_cam.new_zeros(\r\n            [bs, self.num_cams, max_len, D, 1])\r\n\r\n        for j in range(bs):\r\n            for i, reference_points_per_img in enumerate(reference_points_cam):   \r\n                index_query_per_img = indexes[j][i]\r\n                queries_rebatch[j, i, :len(index_query_per_img)] = query[j, index_query_per_img]\r\n                bev_query_depth_rebatch[j, i, :len(index_query_per_img)] = bev_query_depth[j, i, index_query_per_img]\r\n\r\n                reference_points_rebatch[j, i, :len(index_query_per_img)] = reference_points_per_img[j, index_query_per_img]\r\n\r\n        num_cams, l, bs, embed_dims = key.shape\r\n\r\n        key = key.permute(2, 0, 1, 3).reshape(\r\n            bs * self.num_cams, l, self.embed_dims)\r\n        value = value.permute(2, 0, 1, 3).reshape(\r\n            bs * self.num_cams, l, self.embed_dims)\r\n\r\n\r\n        bev_query_depth_rebatch = (bev_query_depth_rebatch- self.dbound[0])/ self.dbound[2]\r\n        bev_query_depth_rebatch = torch.clip(torch.floor(bev_query_depth_rebatch), 0, DC-1).to(torch.long)\r\n        bev_query_depth_rebatch = F.one_hot(bev_query_depth_rebatch.squeeze(-1),\r\n                                   num_classes=DC)\r\n                                   \r\n        queries = self.deformable_attention(query=queries_rebatch.view(bs*self.num_cams, max_len, self.embed_dims), key=key, value=value,\\\r\n                                            reference_points=reference_points_rebatch.view(bs*self.num_cams, max_len, D, 2), spatial_shapes=spatial_shapes,\\\r\n                                            level_start_index=level_start_index,\\\r\n                                            bev_query_depth=bev_query_depth_rebatch.view(bs*self.num_cams, max_len, D, DC),\\\r\n                                            pred_img_depth=pred_img_depth, \\\r\n                                            ).view(bs, self.num_cams, max_len, self.embed_dims)\r\n                        \r\n        for j in range(bs):\r\n            for i in range(num_cams):\r\n                index_query_per_img = indexes[j][i]\r\n                slots[j, index_query_per_img] += queries[j, i, :len(index_query_per_img)]\r\n\r\n        count = per_cam_mask_list_.sum(-1) > 0\r\n        count = count.permute(1, 2, 0).sum(-1)\r\n        count = torch.clamp(count, min=1.0)\r\n        slots = slots / count[..., None]\r\n\r\n\r\n        slots = self.output_proj(slots)\r\n        if self.layer_scale is None:\r\n            return self.dropout(slots) + inp_residual\r\n        else:\r\n            return  self.dropout(self.layer_scale * slots) +  inp_residual\r\n\r\n\r\n\r\n\r\n@ATTENTION.register_module()\r\nclass DA_MSDeformableAttention(BaseModule):\r\n    \"\"\"An attention module used in BEVFormer based on Deformable-Detr.\r\n    `Deformable DETR: Deformable Transformers for End-to-End Object Detection.\r\n    <https://arxiv.org/pdf/2010.04159.pdf>`_.\r\n    Args:\r\n        embed_dims (int): The embedding dimension of Attention.\r\n            Default: 256.\r\n        num_heads (int): Parallel attention heads. Default: 64.\r\n        num_levels (int): The number of feature map used in\r\n            Attention. Default: 4.\r\n        num_points (int): The number of sampling points for\r\n            each query in each head. Default: 4.\r\n        im2col_step (int): The step used in image_to_column.\r\n            Default: 64.\r\n        dropout (float): A Dropout layer on `inp_identity`.\r\n            Default: 0.1.\r\n        batch_first (bool): Key, Query and Value are shape of\r\n            (batch, n, embed_dim)\r\n            or (n, batch, embed_dim). Default to False.\r\n        norm_cfg (dict): Config dict for normalization layer.\r\n            Default: None.\r\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\r\n            Default: None.\r\n    \"\"\"\r\n\r\n    def __init__(self,\r\n                 embed_dims=256,\r\n                 num_heads=8,\r\n                 num_levels=4,\r\n                 num_points=8,\r\n                 num_Z_anchors=4,\r\n                 im2col_step=64,\r\n                 dropout=0.1,\r\n                 batch_first=True,\r\n                 disable_deformable=False,\r\n                 norm_cfg=None,\r\n                 init_cfg=None):\r\n        super().__init__(init_cfg)\r\n        if embed_dims % num_heads != 0:\r\n            raise ValueError(f'embed_dims must be divisible by num_heads, '\r\n                             f'but got {embed_dims} and {num_heads}')\r\n        dim_per_head = embed_dims // num_heads\r\n        self.norm_cfg = norm_cfg\r\n        self.batch_first = batch_first\r\n        self.output_proj = None\r\n        self.fp16_enabled = False\r\n        self.disable_deformable = disable_deformable\r\n        self.num_Z_anchors = num_Z_anchors\r\n\r\n        # you'd better set dim_per_head to a power of 2\r\n        # which is more efficient in the CUDA implementation\r\n        def _is_power_of_2(n):\r\n            if (not isinstance(n, int)) or (n < 0):\r\n                raise ValueError(\r\n                    'invalid input for _is_power_of_2: {} (type: {})'.format(\r\n                        n, type(n)))\r\n            return (n & (n - 1) == 0) and n != 0\r\n\r\n        if not _is_power_of_2(dim_per_head):\r\n            warnings.warn(\r\n                \"You'd better set embed_dims in \"\r\n                'MultiScaleDeformAttention to make '\r\n                'the dimension of each attention head a power of 2 '\r\n                'which is more efficient in our CUDA implementation.')\r\n\r\n        self.im2col_step = im2col_step\r\n        self.embed_dims = embed_dims\r\n        self.num_levels = num_levels\r\n        self.num_heads = num_heads\r\n        self.num_points = num_points\r\n        self.sampling_offsets = nn.Linear(\r\n            embed_dims, num_heads * num_levels * num_points * 2)\r\n        self.attention_weights = nn.Linear(embed_dims,\r\n                                           num_heads * num_levels * num_points)\r\n        self.value_proj = nn.Linear(embed_dims, embed_dims)\r\n       \r\n        self.init_weights()\r\n\r\n    def init_weights(self):\r\n        \"\"\"Default initialization for Parameters of Module.\"\"\"\r\n        constant_init(self.sampling_offsets, 0.)\r\n        thetas = torch.arange(\r\n            self.num_heads,\r\n            dtype=torch.float32) * (2.0 * math.pi / self.num_heads)\r\n        \r\n        self.each_anchor_points = self.num_points // self.num_Z_anchors\r\n\r\n        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)\r\n        grid_init = (grid_init /\r\n                     grid_init.abs().max(-1, keepdim=True)[0]).view(\r\n            self.num_heads, 1, 1, 1,\r\n            2).repeat(1, self.num_levels,  self.each_anchor_points, self.num_Z_anchors, 1)\r\n        for i in range(self.each_anchor_points):\r\n            for j in range(self.num_Z_anchors):\r\n                grid_init[:, :, i, j, :] *= i + 1\r\n\r\n        self.sampling_offsets.bias.data = grid_init.view(-1)\r\n        constant_init(self.attention_weights, val=0., bias=0.)\r\n        xavier_init(self.value_proj, distribution='uniform', bias=0.)\r\n        xavier_init(self.output_proj, distribution='uniform', bias=0.)\r\n        self._is_init = True\r\n\r\n    @force_fp32()\r\n    def forward(self,\r\n                query,\r\n                key=None,\r\n                value=None,\r\n                identity=None,\r\n                query_pos=None,\r\n                key_padding_mask=None,\r\n                reference_points=None,\r\n                spatial_shapes=None,\r\n                level_start_index=None,\r\n                bev_query_depth=None,\r\n                pred_img_depth=None,\r\n               \r\n                **kwargs):\r\n        \"\"\"Forward Function of MultiScaleDeformAttention.\r\n        Args:\r\n            query (Tensor): Query of Transformer with shape\r\n                ( bs, num_query, embed_dims).\r\n            key (Tensor): The key tensor with shape\r\n                `(bs, num_key,  embed_dims)`.\r\n            value (Tensor): The value tensor with shape\r\n                `(bs, num_key,  embed_dims)`.\r\n            identity (Tensor): The tensor used for addition, with the\r\n                same shape as `query`. Default None. If None,\r\n                `query` will be used.\r\n            query_pos (Tensor): The positional encoding for `query`.\r\n                Default: None.\r\n            key_pos (Tensor): The positional encoding for `key`. Default\r\n                None.\r\n            reference_points (Tensor):  The normalized reference\r\n                points with shape (bs, num_query, num_levels, 2),\r\n                all elements is range in [0, 1], top-left (0,0),\r\n                bottom-right (1, 1), including padding area.\r\n                or (N, Length_{query}, num_levels, 4), add\r\n                additional two dimensions is (w, h) to\r\n                form reference boxes.\r\n            key_padding_mask (Tensor): ByteTensor for `query`, with\r\n                shape [bs, num_key].\r\n            spatial_shapes (Tensor): Spatial shape of features in\r\n                different levels. With shape (num_levels, 2),\r\n                last dimension represents (h, w).\r\n            level_start_index (Tensor): The start index of each level.\r\n                A tensor has shape ``(num_levels, )`` and can be represented\r\n                as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].\r\n        Returns:\r\n             Tensor: forwarded results with shape [num_query, bs, embed_dims].\r\n        \"\"\"\r\n\r\n        if value is None:\r\n            value = query\r\n        if identity is None:\r\n            identity = query\r\n        if query_pos is not None:\r\n            query = query + query_pos\r\n\r\n        if not self.batch_first:\r\n            # change to (bs, num_query ,embed_dims)\r\n            query = query.permute(1, 0, 2)\r\n            value = value.permute(1, 0, 2)\r\n\r\n        bs, num_query, _ = query.shape\r\n        bs, num_value, _ = value.shape\r\n        assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value\r\n\r\n        value = self.value_proj(value)\r\n        if key_padding_mask is not None:\r\n            value = value.masked_fill(key_padding_mask[..., None], 0.0)\r\n        value = value.view(bs, num_value, self.num_heads, -1)\r\n        sampling_offsets = self.sampling_offsets(query).view(\r\n            bs, num_query, self.num_heads, self.num_levels, self.num_points, 2)\r\n        attention_weights = self.attention_weights(query).view(\r\n            bs, num_query, self.num_heads, self.num_levels * self.num_points)\r\n        if self.disable_deformable:\r\n            sampling_offsets = sampling_offsets * 0\r\n            attention_weights = attention_weights * 0\r\n        attention_weights = attention_weights.softmax(-1)\r\n\r\n        attention_weights = attention_weights.view(bs, num_query,\r\n                                                   self.num_heads,\r\n                                                   self.num_levels,\r\n                                                   self.num_points)\r\n\r\n        if reference_points.shape[-1] == 2:\r\n            \"\"\"\r\n            For each BEV query, it owns `num_Z_anchors` in 3D space that having different heights.\r\n            After proejcting, each BEV query has `num_Z_anchors` reference points in each 2D image.\r\n            For each referent point, we sample `num_points` sampling points.\r\n            For `num_Z_anchors` reference points,  it has overall `num_points * num_Z_anchors` sampling points.\r\n            \"\"\"\r\n            offset_normalizer = torch.stack(\r\n                [spatial_shapes[..., 1], spatial_shapes[..., 0]], -1)\r\n\r\n            bs, num_query, num_Z_anchors, xy = reference_points.shape\r\n            reference_points = reference_points[:, :, None, None, None, :, :]\r\n\r\n            sampling_offsets = sampling_offsets / \\\r\n                offset_normalizer[None, None, None, :, None, :]\r\n            bs, num_query, num_heads, num_levels, num_all_points, xy = sampling_offsets.shape\r\n            sampling_offsets = sampling_offsets.view(\r\n                bs, num_query, num_heads, num_levels, num_all_points // num_Z_anchors, num_Z_anchors, xy)\r\n            sampling_locations = reference_points + sampling_offsets\r\n            bs, num_query, num_heads, num_levels, num_points, num_Z_anchors, xy = sampling_locations.shape\r\n            assert num_all_points == num_points * num_Z_anchors\r\n\r\n            sampling_locations = sampling_locations.view(\r\n                bs, num_query, num_heads, num_levels, num_all_points, xy)\r\n\r\n        elif reference_points.shape[-1] == 4:\r\n            assert False\r\n        else:\r\n            raise ValueError(\r\n                f'Last dim of reference_points must be'\r\n                f' 2 or 4, but get {reference_points.shape[-1]} instead.')\r\n\r\n        if torch.cuda.is_available() and value.is_cuda:\r\n            if value.dtype == torch.float16:\r\n                MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32\r\n            else:\r\n                MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32\r\n            depth_reference_points = reference_points.reshape(bs, num_query * num_Z_anchors, 1, 1, 1, 2).contiguous()\r\n            depth_attention_weights = torch.ones_like(depth_reference_points[...,0]).contiguous()\r\n            depth_weights = MultiScaleDeformableAttnFunction.apply(\r\n                pred_img_depth.unsqueeze(2).contiguous(), spatial_shapes[0:1], level_start_index[0:1], depth_reference_points,\r\n                depth_attention_weights, self.im2col_step).reshape(bs, num_query, num_Z_anchors, -1)\r\n            depth_weights = (depth_weights * bev_query_depth).sum(-1)\r\n            depth_weights = depth_weights.unsqueeze(2).repeat(1,1, num_points, 1).reshape(bs, num_query, num_all_points)\r\n            \r\n            attention_weights = attention_weights * depth_weights[:, :, None, None, :]\r\n            output = MultiScaleDeformableAttnFunction.apply(\r\n                value, spatial_shapes, level_start_index, sampling_locations,\r\n                attention_weights, self.im2col_step)\r\n        else:\r\n            output = multi_scale_deformable_attn_pytorch(\r\n                value, spatial_shapes, sampling_locations, attention_weights)\r\n        if not self.batch_first:\r\n            output = output.permute(1, 0, 2)\r\n        return output\r\n"
  },
  {
    "path": "mmdet3d/models/fbbev/view_transformation/forward_projection/__init__.py",
    "content": "from .view_transformer import LSSViewTransformerFunction, LSSViewTransformerFunction3D\n"
  },
  {
    "path": "mmdet3d/models/fbbev/view_transformation/forward_projection/view_transformer.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import build_conv_layer\nfrom mmcv.runner import BaseModule, force_fp32\nfrom torch.cuda.amp.autocast_mode import autocast\nfrom torch.utils.checkpoint import checkpoint\n\nfrom mmdet3d.ops.bev_pool_v2.bev_pool import bev_pool_v2\nfrom mmdet.models.backbones.resnet import BasicBlock\nfrom mmdet3d.models.builder import NECKS\nimport torch.utils.checkpoint as cp\nimport time\n\ndef gen_dx_bx(xbound, ybound, zbound):\n    dx = torch.Tensor([row[2] for row in [xbound, ybound, zbound]])\n    bx = torch.Tensor([row[0] + row[2]/2.0 for row in [xbound, ybound, zbound]])\n    nx = torch.Tensor([(row[1] - row[0]) / row[2] for row in [xbound, ybound, zbound]])\n    return dx, bx, nx\n\n\n@NECKS.register_module()\nclass LSSViewTransformerFunction(BaseModule):\n    r\"\"\"Lift-Splat-Shoot view transformer.\n\n    Please refer to the `paper <https://arxiv.org/abs/2008.05711>`_\n\n    Args:\n        grid_config (dict): Config of grid alone each axis in format of\n            (lower_bound, upper_bound, interval). axis in {x,y,z,depth}.\n        input_size (tuple(int)): Size of input images in format of (height,\n            width).\n        downsample (int): Down sample factor from the input size to the feature\n            size.\n        in_channels (int): Channels of input feature.\n        out_channels (int): Channels of transformed feature.\n        accelerate (bool): Whether the view transformation is conducted with\n            acceleration. Note: the intrinsic and extrinsic of cameras should\n            be constant when 'accelerate' is set true.\n    \"\"\"\n\n    def __init__(\n        self,\n        grid_config,\n        input_size,\n        downsample=16,\n        accelerate=False,\n        uniform=False,\n        with_cp=False\n    ):\n        super(LSSViewTransformerFunction, self).__init__()\n        self.uniform = uniform\n        self.with_cp = with_cp\n        self.grid_config = grid_config\n        self.downsample = downsample\n        self.create_grid_infos(**grid_config)\n        dx, bx, nx = gen_dx_bx(self.grid_config['x'],\n                               self.grid_config['y'],\n                               self.grid_config['z'],\n                               )\n        self.dx = nn.Parameter(dx, requires_grad=False)\n        self.bx = nn.Parameter(bx, requires_grad=False)\n        self.nx = nn.Parameter(nx, requires_grad=False)\n        self.create_frustum(grid_config['depth'], input_size, downsample)\n        self.accelerate = accelerate\n        self.initial_flag = True\n\n    def create_grid_infos(self, x, y, z, **kwargs):\n        \"\"\"Generate the grid information including the lower bound, interval,\n        and size.\n\n        Args:\n            x (tuple(float)): Config of grid alone x axis in format of\n                (lower_bound, upper_bound, interval).\n            y (tuple(float)): Config of grid alone y axis in format of\n                (lower_bound, upper_bound, interval).\n            z (tuple(float)): Config of grid alone z axis in format of\n                (lower_bound, upper_bound, interval).\n            **kwargs: Container for other potential parameters\n        \"\"\"\n        self.grid_lower_bound = torch.Tensor([cfg[0] for cfg in [x, y, z]])\n        self.grid_interval = torch.Tensor([cfg[2] for cfg in [x, y, z]])\n        self.grid_size = torch.Tensor([(cfg[1] - cfg[0]) / cfg[2]\n                                       for cfg in [x, y, z]])\n\n    def create_frustum(self, depth_cfg, input_size, downsample):\n        \"\"\"Generate the frustum template for each image.\n\n        Args:\n            depth_cfg (tuple(float)): Config of grid alone depth axis in format\n                of (lower_bound, upper_bound, interval).\n            input_size (tuple(int)): Size of input images in format of (height,\n                width).\n            downsample (int): Down sample scale factor from the input size to\n                the feature size.\n        \"\"\"\n        H_in, W_in = input_size\n        H_feat, W_feat = H_in // downsample, W_in // downsample\n        d = torch.arange(*depth_cfg, dtype=torch.float)\\\n            .view(-1, 1, 1).expand(-1, H_feat, W_feat)\n        self.D = d.shape[0]\n        x = torch.linspace(0, W_in - 1, W_feat,  dtype=torch.float)\\\n            .view(1, 1, W_feat).expand(self.D, H_feat, W_feat)\n        y = torch.linspace(0, H_in - 1, H_feat,  dtype=torch.float)\\\n            .view(1, H_feat, 1).expand(self.D, H_feat, W_feat)\n\n        # D x H x W x 3\n        self.frustum = torch.stack((x, y, d), -1)\n\n    def get_lidar_coor(self, rots, trans, cam2imgs, post_rots, post_trans,\n                       bda):\n        \"\"\"Calculate the locations of the frustum points in the lidar\n        coordinate system.\n\n        Args:\n            rots (torch.Tensor): Rotation from camera coordinate system to\n                lidar coordinate system in shape (B, N_cams, 3, 3).\n            trans (torch.Tensor): Translation from camera coordinate system to\n                lidar coordinate system in shape (B, N_cams, 3).\n            cam2imgs (torch.Tensor): Camera intrinsic matrixes in shape\n                (B, N_cams, 3, 3).\n            post_rots (torch.Tensor): Rotation in camera coordinate system in\n                shape (B, N_cams, 3, 3). It is derived from the image view\n                augmentation.\n            post_trans (torch.Tensor): Translation in camera coordinate system\n                derived from image view augmentation in shape (B, N_cams, 3).\n\n        Returns:\n            torch.tensor: Point coordinates in shape\n                (B, N_cams, D, ownsample, 3)\n        \"\"\"\n\n        B, N, _ = trans.shape\n\n        # post-transformation\n        # B x N x D x H x W x 3\n\n        points = self.frustum.to(rots) - post_trans.view(B, N, 1, 1, 1, 3)\n        points = torch.inverse(post_rots).view(B, N, 1, 1, 1, 3, 3)\\\n            .matmul(points.unsqueeze(-1))\n\n        # cam_to_ego\n        points = torch.cat(\n            (points[..., :2, :] * points[..., 2:3, :], points[..., 2:3, :]), 5)\n        combine = rots.matmul(torch.inverse(cam2imgs))\n        points = combine.view(B, N, 1, 1, 1, 3, 3).matmul(points).squeeze(-1)\n        points += trans.view(B, N, 1, 1, 1, 3)\n        points = bda.view(B, 1, 1, 1, 1, 3,\n                          3).matmul(points.unsqueeze(-1)).squeeze(-1)\n        return points\n\n    def init_acceleration_v2(self, coor):\n        \"\"\"Pre-compute the necessary information in acceleration including the\n        index of points in the final feature.\n\n        Args:\n            coor (torch.tensor): Coordinate of points in lidar space in shape\n                (B, N_cams, D, H, W, 3).\n            x (torch.tensor): Feature of points in shape\n                (B, N_cams, D, H, W, C).\n        \"\"\"\n\n        ranks_bev, ranks_depth, ranks_feat, \\\n            interval_starts, interval_lengths = \\\n            self.voxel_pooling_prepare_v2(coor)\n\n        self.ranks_bev = ranks_bev.int().contiguous()\n        self.ranks_feat = ranks_feat.int().contiguous()\n        self.ranks_depth = ranks_depth.int().contiguous()\n        self.interval_starts = interval_starts.int().contiguous()\n        self.interval_lengths = interval_lengths.int().contiguous()\n\n    def voxel_pooling_v2(self, coor, depth, feat):\n        ranks_bev, ranks_depth, ranks_feat, \\\n            interval_starts, interval_lengths = \\\n            self.voxel_pooling_prepare_v2(coor)\n        if ranks_feat is None:\n            print('warning ---> no points within the predefined '\n                  'bev receptive field')\n            dummy = torch.zeros(size=[\n                feat.shape[0], feat.shape[2],\n                int(self.grid_size[2]),\n                int(self.grid_size[0]),\n                int(self.grid_size[1])\n            ]).to(feat)\n            dummy = torch.cat(dummy.unbind(dim=2), 1)\n            return dummy\n        feat = feat.permute(0, 1, 3, 4, 2)\n        bev_feat_shape = (depth.shape[0], int(self.grid_size[2]),\n                          int(self.grid_size[1]), int(self.grid_size[0]),\n                          feat.shape[-1])  # (B, Z, Y, X, C)\n        bev_feat = bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev,\n                               bev_feat_shape, interval_starts,\n                               interval_lengths)\n        # collapse Z\n        bev_feat = torch.cat(bev_feat.unbind(dim=2), 1)\n        return bev_feat\n\n    def voxel_pooling_prepare_v2(self, coor):\n        \"\"\"Data preparation for voxel pooling.\n\n        Args:\n            coor (torch.tensor): Coordinate of points in the lidar space in\n                shape (B, N, D, H, W, 3).\n\n        Returns:\n            tuple[torch.tensor]: Rank of the voxel that a point is belong to\n                in shape (N_Points); Reserved index of points in the depth\n                space in shape (N_Points). Reserved index of points in the\n                feature space in shape (N_Points).\n        \"\"\"\n        B, N, D, H, W, _ = coor.shape\n        num_points = B * N * D * H * W\n        # record the index of selected points for acceleration purpose\n        ranks_depth = torch.range(\n            0, num_points - 1, dtype=torch.int, device=coor.device)\n        ranks_feat = torch.range(\n            0, num_points // D - 1, dtype=torch.int, device=coor.device)\n        ranks_feat = ranks_feat.reshape(B, N, 1, H, W)\n        ranks_feat = ranks_feat.expand(B, N, D, H, W).flatten()\n        # convert coordinate into the voxel space\n        coor = ((coor - self.grid_lower_bound.to(coor)) /\n                self.grid_interval.to(coor))\n        coor = coor.long().view(num_points, 3)\n        batch_idx = torch.range(0, B - 1).reshape(B, 1). \\\n            expand(B, num_points // B).reshape(num_points, 1).to(coor)\n        coor = torch.cat((coor, batch_idx), 1)\n\n        # filter out points that are outside box\n        kept = (coor[:, 0] >= 0) & (coor[:, 0] < self.grid_size[0]) & \\\n               (coor[:, 1] >= 0) & (coor[:, 1] < self.grid_size[1]) & \\\n               (coor[:, 2] >= 0) & (coor[:, 2] < self.grid_size[2])\n        if len(kept) == 0:\n            return None, None, None, None, None\n        coor, ranks_depth, ranks_feat = \\\n            coor[kept], ranks_depth[kept], ranks_feat[kept]\n        # get tensors from the same voxel next to each other\n        ranks_bev = coor[:, 3] * (\n            self.grid_size[2] * self.grid_size[1] * self.grid_size[0])\n        ranks_bev += coor[:, 2] * (self.grid_size[1] * self.grid_size[0])\n        ranks_bev += coor[:, 1] * self.grid_size[0] + coor[:, 0]\n        order = ranks_bev.argsort()\n        ranks_bev, ranks_depth, ranks_feat = \\\n            ranks_bev[order], ranks_depth[order], ranks_feat[order]\n\n        kept = torch.ones(\n            ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool)\n        kept[1:] = ranks_bev[1:] != ranks_bev[:-1]\n        interval_starts = torch.where(kept)[0].int()\n        if len(interval_starts) == 0:\n            return None, None, None, None, None\n        interval_lengths = torch.zeros_like(interval_starts)\n        interval_lengths[:-1] = interval_starts[1:] - interval_starts[:-1]\n        interval_lengths[-1] = ranks_bev.shape[0] - interval_starts[-1]\n        return ranks_bev.int().contiguous(), ranks_depth.int().contiguous(\n        ), ranks_feat.int().contiguous(), interval_starts.int().contiguous(\n        ), interval_lengths.int().contiguous()\n\n    def pre_compute(self, cam_params):\n        if self.initial_flag:\n            coor = self.get_lidar_coor(*cam_params)\n            self.init_acceleration_v2(coor)\n            self.initial_flag = False\n\n    def view_transform_core(self, cam_params, depth, tran_feat):\n       \n\n        # Lift-Splat\n        if self.accelerate:\n            feat = tran_feat # tran_feat.view(B, N, self.out_channels, H, W)\n            feat = feat.permute(0, 1, 3, 4, 2)\n            depth = depth #.view(B, N, self.D, H, W)\n            bev_feat_shape = (depth.shape[0], int(self.grid_size[2]),\n                              int(self.grid_size[1]), int(self.grid_size[0]),\n                              feat.shape[-1])  # (B, Z, Y, X, C)\n            bev_feat = bev_pool_v2(depth, feat, self.ranks_depth,\n                                   self.ranks_feat, self.ranks_bev,\n                                   bev_feat_shape, self.interval_starts,\n                                   self.interval_lengths)\n\n            bev_feat = bev_feat.squeeze(2)\n        else:\n            coor = self.get_lidar_coor(*cam_params)\n            bev_feat = self.voxel_pooling_v2(\n                coor, depth,\n                tran_feat)\n        return bev_feat\n\n    def view_transform(self, cam_params, depth, tran_feat):\n        if self.accelerate:\n            self.pre_compute(cam_params)\n\n        return self.view_transform_core(cam_params, depth, tran_feat)\n\n    def forward(self, cam_params, context, depth,  **kwargs):\n        \"\"\"Transform image-view feature into bird-eye-view feature.\n\n        Args:\n            cam_params (list(torch.tensor)): of (rots, trans,\n                intrins, post_rots, post_trans)\n\n        Returns:\n            torch.tensor: Bird-eye-view feature in shape (B, C, H_BEV, W_BEV)\n        \"\"\"\n       \n        return self.view_transform(cam_params, depth, context)\n\n    def get_mlp_input(self, rot, tran, intrin, post_rot, post_tran, bda):\n        return None\n\n\n\n@NECKS.register_module()\nclass LSSViewTransformerFunction3D(BaseModule):\n    r\"\"\"Lift-Splat-Shoot view transformer.\n\n    Please refer to the `paper <https://arxiv.org/abs/2008.05711>`_\n\n    Args:\n        grid_config (dict): Config of grid alone each axis in format of\n            (lower_bound, upper_bound, interval). axis in {x,y,z,depth}.\n        input_size (tuple(int)): Size of input images in format of (height,\n            width).\n        downsample (int): Down sample factor from the input size to the feature\n            size.\n        in_channels (int): Channels of input feature.\n        out_channels (int): Channels of transformed feature.\n        accelerate (bool): Whether the view transformation is conducted with\n            acceleration. Note: the intrinsic and extrinsic of cameras should\n            be constant when 'accelerate' is set true.\n    \"\"\"\n\n    def __init__(\n        self,\n        grid_config,\n        input_size,\n        downsample=16,\n        # in_channels=512,\n        # out_channels=64,\n        accelerate=False,\n        uniform=False,\n        with_cp=False,\n        extra_relu=False,\n    ):\n        super(LSSViewTransformerFunction3D, self).__init__()\n        self.uniform = uniform\n        self.with_cp = with_cp\n        self.extra_relu=extra_relu\n        self.grid_config = grid_config\n        dx, bx, nx = gen_dx_bx(self.grid_config['x'],\n                               self.grid_config['y'],\n                               self.grid_config['z'],\n                               )\n        self.dx = nn.Parameter(dx, requires_grad=False)\n        self.bx = nn.Parameter(bx, requires_grad=False)\n        self.nx = nn.Parameter(nx, requires_grad=False)\n\n        self.downsample = downsample\n        self.create_grid_infos(**grid_config)\n        self.input_size = input_size\n        self.create_frustum(grid_config['depth'], input_size, downsample)\n        # self.out_channels = out_channels\n        # self.in_channels = in_channels\n        # self.depth_net = nn.Conv2d( in_channels, self.D + self.out_channels, kernel_size=1, padding=0)\n        self.accelerate = accelerate\n        self.initial_flag = True\n\n\n    def create_grid_infos(self, x, y, z, **kwargs):\n        \"\"\"Generate the grid information including the lower bound, interval,\n        and size.\n\n        Args:\n            x (tuple(float)): Config of grid alone x axis in format of\n                (lower_bound, upper_bound, interval).\n            y (tuple(float)): Config of grid alone y axis in format of\n                (lower_bound, upper_bound, interval).\n            z (tuple(float)): Config of grid alone z axis in format of\n                (lower_bound, upper_bound, interval).\n            **kwargs: Container for other potential parameters\n        \"\"\"\n        self.grid_lower_bound = torch.Tensor([cfg[0] for cfg in [x, y, z]])\n        self.grid_interval = torch.Tensor([cfg[2] for cfg in [x, y, z]])\n        self.grid_size = torch.Tensor([(cfg[1] - cfg[0]) / cfg[2]\n                                       for cfg in [x, y, z]])\n\n    def create_frustum(self, depth_cfg, input_size, downsample):\n        \"\"\"Generate the frustum template for each image.\n\n        Args:\n            depth_cfg (tuple(float)): Config of grid alone depth axis in format\n                of (lower_bound, upper_bound, interval).\n            input_size (tuple(int)): Size of input images in format of (height,\n                width).\n            downsample (int): Down sample scale factor from the input size to\n                the feature size.\n        \"\"\"\n        H_in, W_in = input_size\n        H_feat, W_feat = H_in // downsample, W_in // downsample\n        d = torch.arange(*depth_cfg, dtype=torch.float)\\\n            .view(-1, 1, 1).expand(-1, H_feat, W_feat)\n        self.D = d.shape[0]\n        x = torch.linspace(0, W_in - 1, W_feat,  dtype=torch.float)\\\n            .view(1, 1, W_feat).expand(self.D, H_feat, W_feat)\n        y = torch.linspace(0, H_in - 1, H_feat,  dtype=torch.float)\\\n            .view(1, H_feat, 1).expand(self.D, H_feat, W_feat)\n\n        # D x H x W x 3\n        self.frustum = torch.stack((x, y, d), -1)\n\n    def get_cam2ego_coor(self, input, downsample=1):\n        depth_cfg = self.grid_config['depth']\n        \n        H_in, W_in = self.input_size\n        H_feat, W_feat = H_in // downsample, W_in // downsample\n        d = torch.arange(*depth_cfg, dtype=torch.float)\\\n            .view(-1, 1, 1).expand(-1, H_feat, W_feat)\n        D = d.shape[0]\n        x = torch.linspace(0, W_in - 1, W_feat,  dtype=torch.float)\\\n            .view(1, 1, W_feat).expand(self.D, H_feat, W_feat)\n        y = torch.linspace(0, H_in - 1, H_feat,  dtype=torch.float)\\\n            .view(1, H_feat, 1).expand(self.D, H_feat, W_feat)\n\n        # D x H x W x 3\n        frustum = torch.stack((x, y, d), -1)\n        rots, trans, cam2imgs, post_rots, post_trans, bda = input\n        \n        B, N, _ = trans.shape\n\n        # post-transformation\n        # B x N x D x H x W x 3\n\n        points = frustum.to(rots) - post_trans.view(B, N, 1, 1, 1, 3)\n        points = torch.inverse(post_rots).view(B, N, 1, 1, 1, 3, 3)\\\n            .matmul(points.unsqueeze(-1))\n\n        # cam_to_ego\n        points = torch.cat(\n            (points[..., :2, :] * points[..., 2:3, :], points[..., 2:3, :]), 5)\n        combine = rots.matmul(torch.inverse(cam2imgs))\n        points = combine.view(B, N, 1, 1, 1, 3, 3).matmul(points).squeeze(-1)\n        points += trans.view(B, N, 1, 1, 1, 3)\n        points = bda.view(B, 1, 1, 1, 1, 3,\n                          3).matmul(points.unsqueeze(-1)).squeeze(-1)\n        coor = points\n        coor = ((coor - self.grid_lower_bound.to(coor)) / 0.4)\n        coor = coor.long()\n        # filter out points that are outside box\n        kept = (coor[..., 0] >= 0) & (coor[..., 0] < 200) & \\\n               (coor[..., 1] >= 0) & (coor[..., 1] < 200) & \\\n               (coor[..., 2] >= 0) & (coor[..., 2] < 16)\n      \n        coor[~kept] = -999\n        return coor\n\n    def get_lidar_coor(self, rots, trans, cam2imgs, post_rots, post_trans,\n                       bda):\n        \"\"\"Calculate the locations of the frustum points in the lidar\n        coordinate system.\n\n        Args:\n            rots (torch.Tensor): Rotation from camera coordinate system to\n                lidar coordinate system in shape (B, N_cams, 3, 3).\n            trans (torch.Tensor): Translation from camera coordinate system to\n                lidar coordinate system in shape (B, N_cams, 3).\n            cam2imgs (torch.Tensor): Camera intrinsic matrixes in shape\n                (B, N_cams, 3, 3).\n            post_rots (torch.Tensor): Rotation in camera coordinate system in\n                shape (B, N_cams, 3, 3). It is derived from the image view\n                augmentation.\n            post_trans (torch.Tensor): Translation in camera coordinate system\n                derived from image view augmentation in shape (B, N_cams, 3).\n\n        Returns:\n            torch.tensor: Point coordinates in shape\n                (B, N_cams, D, ownsample, 3)\n        \"\"\"\n\n        B, N, _ = trans.shape\n\n        # post-transformation\n        # B x N x D x H x W x 3\n\n        points = self.frustum.to(rots) - post_trans.view(B, N, 1, 1, 1, 3)\n        points = torch.inverse(post_rots).view(B, N, 1, 1, 1, 3, 3)\\\n            .matmul(points.unsqueeze(-1))\n\n        # cam_to_ego\n        points = torch.cat(\n            (points[..., :2, :] * points[..., 2:3, :], points[..., 2:3, :]), 5)\n        combine = rots.matmul(torch.inverse(cam2imgs))\n        points = combine.view(B, N, 1, 1, 1, 3, 3).matmul(points).squeeze(-1)\n        points += trans.view(B, N, 1, 1, 1, 3)\n        points = bda.view(B, 1, 1, 1, 1, 3,\n                          3).matmul(points.unsqueeze(-1)).squeeze(-1)\n        return points\n\n    def init_acceleration_v2(self, coor):\n        \"\"\"Pre-compute the necessary information in acceleration including the\n        index of points in the final feature.\n\n        Args:\n            coor (torch.tensor): Coordinate of points in lidar space in shape\n                (B, N_cams, D, H, W, 3).\n            x (torch.tensor): Feature of points in shape\n                (B, N_cams, D, H, W, C).\n        \"\"\"\n\n        ranks_bev, ranks_depth, ranks_feat, \\\n            interval_starts, interval_lengths = \\\n            self.voxel_pooling_prepare_v2(coor)\n\n        self.ranks_bev = ranks_bev.int().contiguous()\n        self.ranks_feat = ranks_feat.int().contiguous()\n        self.ranks_depth = ranks_depth.int().contiguous()\n        self.interval_starts = interval_starts.int().contiguous()\n        self.interval_lengths = interval_lengths.int().contiguous()\n\n    def voxel_pooling_v2(self, coor, depth, feat):\n        ranks_bev, ranks_depth, ranks_feat, \\\n            interval_starts, interval_lengths = \\\n            self.voxel_pooling_prepare_v2(coor)\n        if ranks_feat is None:\n            print('warning ---> no points within the predefined '\n                  'bev receptive field')\n            dummy = torch.zeros(size=[\n                feat.shape[0], feat.shape[2],\n                int(self.grid_size[0]),\n                int(self.grid_size[1]),\n                int(self.grid_size[2]),\n            ]).to(feat)\n            # dummy = torch.cat(dummy.unbind(dim=2), 1)\n            return dummy\n        feat = feat.permute(0, 1, 3, 4, 2)\n        bev_feat_shape = (depth.shape[0], int(self.grid_size[2]),\n                          int(self.grid_size[1]), int(self.grid_size[0]),\n                          feat.shape[-1])  # (B, Z, Y, X, C)\n        bev_feat = bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev,\n                               bev_feat_shape, interval_starts,\n                               interval_lengths)\n        bev_feat = bev_feat.permute(0, 1, 3, 4, 2) # B, C, Z, X, Y- > B, C, X, Y, Z\n        # bev_feat = torch.cat(bev_feat.unbind(dim=2), 1)\n        return bev_feat\n\n    def voxel_pooling_prepare_v2(self, coor):\n        \"\"\"Data preparation for voxel pooling.\n\n        Args:\n            coor (torch.tensor): Coordinate of points in the lidar space in\n                shape (B, N, D, H, W, 3).\n\n        Returns:\n            tuple[torch.tensor]: Rank of the voxel that a point is belong to\n                in shape (N_Points); Reserved index of points in the depth\n                space in shape (N_Points). Reserved index of points in the\n                feature space in shape (N_Points).\n        \"\"\"\n        B, N, D, H, W, _ = coor.shape\n        num_points = B * N * D * H * W\n        # record the index of selected points for acceleration purpose\n        ranks_depth = torch.arange(\n            0, num_points, dtype=torch.int, device=coor.device)\n        ranks_feat = torch.arange(\n            0, num_points // D , dtype=torch.int, device=coor.device)\n        ranks_feat = ranks_feat.reshape(B, N, 1, H, W)\n        ranks_feat = ranks_feat.expand(B, N, D, H, W).flatten()\n        # convert coordinate into the voxel space\n        coor = ((coor - self.grid_lower_bound.to(coor)) /\n                self.grid_interval.to(coor))\n        coor = coor.long().view(num_points, 3)\n        batch_idx = torch.arange(0, B ).reshape(B, 1). \\\n            expand(B, num_points // B).reshape(num_points, 1).to(coor)\n        coor = torch.cat((coor, batch_idx), 1)\n\n        # filter out points that are outside box\n        kept = (coor[:, 0] >= 0) & (coor[:, 0] < self.grid_size[0]) & \\\n               (coor[:, 1] >= 0) & (coor[:, 1] < self.grid_size[1]) & \\\n               (coor[:, 2] >= 0) & (coor[:, 2] < self.grid_size[2])\n        if len(kept) == 0:\n            return None, None, None, None, None\n        coor, ranks_depth, ranks_feat = \\\n            coor[kept], ranks_depth[kept], ranks_feat[kept]\n        # get tensors from the same voxel next to each other\n        ranks_bev = coor[:, 3] * (\n            self.grid_size[2] * self.grid_size[1] * self.grid_size[0])\n        ranks_bev += coor[:, 2] * (self.grid_size[1] * self.grid_size[0])\n        ranks_bev += coor[:, 1] * self.grid_size[0] + coor[:, 0]\n        order = ranks_bev.argsort()\n        ranks_bev, ranks_depth, ranks_feat = \\\n            ranks_bev[order], ranks_depth[order], ranks_feat[order]\n\n        kept = torch.ones(\n            ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool)\n        kept[1:] = ranks_bev[1:] != ranks_bev[:-1]\n        interval_starts = torch.where(kept)[0].int()\n        if len(interval_starts) == 0:\n            return None, None, None, None, None\n        interval_lengths = torch.zeros_like(interval_starts)\n        interval_lengths[:-1] = interval_starts[1:] - interval_starts[:-1]\n        interval_lengths[-1] = ranks_bev.shape[0] - interval_starts[-1]\n        return ranks_bev.int().contiguous(), ranks_depth.int().contiguous(\n        ), ranks_feat.int().contiguous(), interval_starts.int().contiguous(\n        ), interval_lengths.int().contiguous()\n\n    def pre_compute(self, cam_params):\n        if self.initial_flag:\n            coor = self.get_lidar_coor(*cam_params)\n            self.init_acceleration_v2(coor)\n            self.initial_flag = False\n\n    def view_transform_core(self, cam_params, depth, tran_feat):\n       #  B, N, C, H, W = input[0].shape\n\n        # Lift-Splat\n        if self.accelerate:\n            feat = tran_feat # tran_feat.view(B, N, self.out_channels, H, W)\n            feat = feat.permute(0, 1, 3, 4, 2)\n            depth = depth #.view(B, N, self.D, H, W)\n            bev_feat_shape = (depth.shape[0], int(self.grid_size[2]),\n                              int(self.grid_size[1]), int(self.grid_size[0]),\n                              feat.shape[-1])  # (B, Z, Y, X, C)\n            bev_feat = bev_pool_v2(depth, feat, self.ranks_depth,\n                                   self.ranks_feat, self.ranks_bev,\n                                   bev_feat_shape, self.interval_starts,\n                                   self.interval_lengths)\n            assert False\n            bev_feat = bev_feat.squeeze(2)\n        else:\n            coor = self.get_lidar_coor(*cam_params)\n            bev_feat = self.voxel_pooling_v2(\n                coor, depth,\n                tran_feat)\n        return bev_feat\n\n\n\n    def view_transform(self, cam_params, depth, tran_feat):\n        if self.accelerate:\n            self.pre_compute(cam_params)\n\n        return self.view_transform_core(cam_params, depth, tran_feat)\n\n    # @run_time('lss3d')\n    def forward(self, cam_params, context, depth, **kwargs):\n        \"\"\"Transform image-view feature into bird-eye-view feature.\n\n        Args:\n            input (list(torch.tensor)): of (image-view feature, rots, trans,\n                intrins, post_rots, post_trans)\n\n        Returns:\n            torch.tensor: Bird-eye-view feature in shape (B, C, H_BEV, W_BEV)\n        \"\"\"\n        bev = self.view_transform(cam_params, depth, context)\n        if self.extra_relu:\n            return bev.relu()\n        else:\n            return bev \n\n    def get_mlp_input(self, rot, tran, intrin, post_rot, post_tran, bda):\n        return None\n\n\n\n\n\n\n\n\n\n\n\n\n"
  },
  {
    "path": "mmdet3d/models/fusion_layers/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .coord_transform import (apply_3d_transformation, bbox_2d_transform,\n                              coord_2d_transform)\nfrom .point_fusion import PointFusion\nfrom .vote_fusion import VoteFusion\n\n__all__ = [\n    'PointFusion', 'VoteFusion', 'apply_3d_transformation',\n    'bbox_2d_transform', 'coord_2d_transform'\n]\n"
  },
  {
    "path": "mmdet3d/models/fusion_layers/coord_transform.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom functools import partial\n\nimport torch\n\nfrom mmdet3d.core.points import get_points_type\n\n\ndef apply_3d_transformation(pcd, coord_type, img_meta, reverse=False):\n    \"\"\"Apply transformation to input point cloud.\n\n    Args:\n        pcd (torch.Tensor): The point cloud to be transformed.\n        coord_type (str): 'DEPTH' or 'CAMERA' or 'LIDAR'.\n        img_meta(dict): Meta info regarding data transformation.\n        reverse (bool): Reversed transformation or not.\n\n    Note:\n        The elements in img_meta['transformation_3d_flow']:\n        \"T\" stands for translation;\n        \"S\" stands for scale;\n        \"R\" stands for rotation;\n        \"HF\" stands for horizontal flip;\n        \"VF\" stands for vertical flip.\n\n    Returns:\n        torch.Tensor: The transformed point cloud.\n    \"\"\"\n\n    dtype = pcd.dtype\n    device = pcd.device\n\n    pcd_rotate_mat = (\n        torch.tensor(img_meta['pcd_rotation'], dtype=dtype, device=device)\n        if 'pcd_rotation' in img_meta else torch.eye(\n            3, dtype=dtype, device=device))\n\n    pcd_scale_factor = (\n        img_meta['pcd_scale_factor'] if 'pcd_scale_factor' in img_meta else 1.)\n\n    pcd_trans_factor = (\n        torch.tensor(img_meta['pcd_trans'], dtype=dtype, device=device)\n        if 'pcd_trans' in img_meta else torch.zeros(\n            (3), dtype=dtype, device=device))\n\n    pcd_horizontal_flip = img_meta[\n        'pcd_horizontal_flip'] if 'pcd_horizontal_flip' in \\\n        img_meta else False\n\n    pcd_vertical_flip = img_meta[\n        'pcd_vertical_flip'] if 'pcd_vertical_flip' in \\\n        img_meta else False\n\n    flow = img_meta['transformation_3d_flow'] \\\n        if 'transformation_3d_flow' in img_meta else []\n\n    pcd = pcd.clone()  # prevent inplace modification\n    pcd = get_points_type(coord_type)(pcd)\n\n    horizontal_flip_func = partial(pcd.flip, bev_direction='horizontal') \\\n        if pcd_horizontal_flip else lambda: None\n    vertical_flip_func = partial(pcd.flip, bev_direction='vertical') \\\n        if pcd_vertical_flip else lambda: None\n    if reverse:\n        scale_func = partial(pcd.scale, scale_factor=1.0 / pcd_scale_factor)\n        translate_func = partial(pcd.translate, trans_vector=-pcd_trans_factor)\n        # pcd_rotate_mat @ pcd_rotate_mat.inverse() is not\n        # exactly an identity matrix\n        # use angle to create the inverse rot matrix neither.\n        rotate_func = partial(pcd.rotate, rotation=pcd_rotate_mat.inverse())\n\n        # reverse the pipeline\n        flow = flow[::-1]\n    else:\n        scale_func = partial(pcd.scale, scale_factor=pcd_scale_factor)\n        translate_func = partial(pcd.translate, trans_vector=pcd_trans_factor)\n        rotate_func = partial(pcd.rotate, rotation=pcd_rotate_mat)\n\n    flow_mapping = {\n        'T': translate_func,\n        'S': scale_func,\n        'R': rotate_func,\n        'HF': horizontal_flip_func,\n        'VF': vertical_flip_func\n    }\n    for op in flow:\n        assert op in flow_mapping, f'This 3D data '\\\n            f'transformation op ({op}) is not supported'\n        func = flow_mapping[op]\n        func()\n\n    return pcd.coord\n\n\ndef extract_2d_info(img_meta, tensor):\n    \"\"\"Extract image augmentation information from img_meta.\n\n    Args:\n        img_meta(dict): Meta info regarding data transformation.\n        tensor(torch.Tensor): Input tensor used to create new ones.\n\n    Returns:\n        (int, int, int, int, torch.Tensor, bool, torch.Tensor):\n            The extracted information.\n    \"\"\"\n    img_shape = img_meta['img_shape']\n    ori_shape = img_meta['ori_shape']\n    img_h, img_w, _ = img_shape\n    ori_h, ori_w, _ = ori_shape\n\n    img_scale_factor = (\n        tensor.new_tensor(img_meta['scale_factor'][:2])\n        if 'scale_factor' in img_meta else tensor.new_tensor([1.0, 1.0]))\n    img_flip = img_meta['flip'] if 'flip' in img_meta else False\n    img_crop_offset = (\n        tensor.new_tensor(img_meta['img_crop_offset'])\n        if 'img_crop_offset' in img_meta else tensor.new_tensor([0.0, 0.0]))\n\n    return (img_h, img_w, ori_h, ori_w, img_scale_factor, img_flip,\n            img_crop_offset)\n\n\ndef bbox_2d_transform(img_meta, bbox_2d, ori2new):\n    \"\"\"Transform 2d bbox according to img_meta.\n\n    Args:\n        img_meta(dict): Meta info regarding data transformation.\n        bbox_2d (torch.Tensor): Shape (..., >4)\n            The input 2d bboxes to transform.\n        ori2new (bool): Origin img coord system to new or not.\n\n    Returns:\n        torch.Tensor: The transformed 2d bboxes.\n    \"\"\"\n\n    img_h, img_w, ori_h, ori_w, img_scale_factor, img_flip, \\\n        img_crop_offset = extract_2d_info(img_meta, bbox_2d)\n\n    bbox_2d_new = bbox_2d.clone()\n\n    if ori2new:\n        bbox_2d_new[:, 0] = bbox_2d_new[:, 0] * img_scale_factor[0]\n        bbox_2d_new[:, 2] = bbox_2d_new[:, 2] * img_scale_factor[0]\n        bbox_2d_new[:, 1] = bbox_2d_new[:, 1] * img_scale_factor[1]\n        bbox_2d_new[:, 3] = bbox_2d_new[:, 3] * img_scale_factor[1]\n\n        bbox_2d_new[:, 0] = bbox_2d_new[:, 0] + img_crop_offset[0]\n        bbox_2d_new[:, 2] = bbox_2d_new[:, 2] + img_crop_offset[0]\n        bbox_2d_new[:, 1] = bbox_2d_new[:, 1] + img_crop_offset[1]\n        bbox_2d_new[:, 3] = bbox_2d_new[:, 3] + img_crop_offset[1]\n\n        if img_flip:\n            bbox_2d_r = img_w - bbox_2d_new[:, 0]\n            bbox_2d_l = img_w - bbox_2d_new[:, 2]\n            bbox_2d_new[:, 0] = bbox_2d_l\n            bbox_2d_new[:, 2] = bbox_2d_r\n    else:\n        if img_flip:\n            bbox_2d_r = img_w - bbox_2d_new[:, 0]\n            bbox_2d_l = img_w - bbox_2d_new[:, 2]\n            bbox_2d_new[:, 0] = bbox_2d_l\n            bbox_2d_new[:, 2] = bbox_2d_r\n\n        bbox_2d_new[:, 0] = bbox_2d_new[:, 0] - img_crop_offset[0]\n        bbox_2d_new[:, 2] = bbox_2d_new[:, 2] - img_crop_offset[0]\n        bbox_2d_new[:, 1] = bbox_2d_new[:, 1] - img_crop_offset[1]\n        bbox_2d_new[:, 3] = bbox_2d_new[:, 3] - img_crop_offset[1]\n\n        bbox_2d_new[:, 0] = bbox_2d_new[:, 0] / img_scale_factor[0]\n        bbox_2d_new[:, 2] = bbox_2d_new[:, 2] / img_scale_factor[0]\n        bbox_2d_new[:, 1] = bbox_2d_new[:, 1] / img_scale_factor[1]\n        bbox_2d_new[:, 3] = bbox_2d_new[:, 3] / img_scale_factor[1]\n\n    return bbox_2d_new\n\n\ndef coord_2d_transform(img_meta, coord_2d, ori2new):\n    \"\"\"Transform 2d pixel coordinates according to img_meta.\n\n    Args:\n        img_meta(dict): Meta info regarding data transformation.\n        coord_2d (torch.Tensor): Shape (..., 2)\n            The input 2d coords to transform.\n        ori2new (bool): Origin img coord system to new or not.\n\n    Returns:\n        torch.Tensor: The transformed 2d coordinates.\n    \"\"\"\n\n    img_h, img_w, ori_h, ori_w, img_scale_factor, img_flip, \\\n        img_crop_offset = extract_2d_info(img_meta, coord_2d)\n\n    coord_2d_new = coord_2d.clone()\n\n    if ori2new:\n        # TODO here we assume this order of transformation\n        coord_2d_new[..., 0] = coord_2d_new[..., 0] * img_scale_factor[0]\n        coord_2d_new[..., 1] = coord_2d_new[..., 1] * img_scale_factor[1]\n\n        coord_2d_new[..., 0] += img_crop_offset[0]\n        coord_2d_new[..., 1] += img_crop_offset[1]\n\n        # flip uv coordinates and bbox\n        if img_flip:\n            coord_2d_new[..., 0] = img_w - coord_2d_new[..., 0]\n    else:\n        if img_flip:\n            coord_2d_new[..., 0] = img_w - coord_2d_new[..., 0]\n\n        coord_2d_new[..., 0] -= img_crop_offset[0]\n        coord_2d_new[..., 1] -= img_crop_offset[1]\n\n        coord_2d_new[..., 0] = coord_2d_new[..., 0] / img_scale_factor[0]\n        coord_2d_new[..., 1] = coord_2d_new[..., 1] / img_scale_factor[1]\n\n    return coord_2d_new\n"
  },
  {
    "path": "mmdet3d/models/fusion_layers/point_fusion.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule\nfrom torch import nn as nn\nfrom torch.nn import functional as F\n\nfrom mmdet3d.core.bbox.structures import (get_proj_mat_by_coord_type,\n                                          points_cam2img)\nfrom ..builder import FUSION_LAYERS\nfrom . import apply_3d_transformation\n\n\ndef point_sample(img_meta,\n                 img_features,\n                 points,\n                 proj_mat,\n                 coord_type,\n                 img_scale_factor,\n                 img_crop_offset,\n                 img_flip,\n                 img_pad_shape,\n                 img_shape,\n                 aligned=True,\n                 padding_mode='zeros',\n                 align_corners=True):\n    \"\"\"Obtain image features using points.\n\n    Args:\n        img_meta (dict): Meta info.\n        img_features (torch.Tensor): 1 x C x H x W image features.\n        points (torch.Tensor): Nx3 point cloud in LiDAR coordinates.\n        proj_mat (torch.Tensor): 4x4 transformation matrix.\n        coord_type (str): 'DEPTH' or 'CAMERA' or 'LIDAR'.\n        img_scale_factor (torch.Tensor): Scale factor with shape of\n            (w_scale, h_scale).\n        img_crop_offset (torch.Tensor): Crop offset used to crop\n            image during data augmentation with shape of (w_offset, h_offset).\n        img_flip (bool): Whether the image is flipped.\n        img_pad_shape (tuple[int]): int tuple indicates the h & w after\n            padding, this is necessary to obtain features in feature map.\n        img_shape (tuple[int]): int tuple indicates the h & w before padding\n            after scaling, this is necessary for flipping coordinates.\n        aligned (bool, optional): Whether use bilinear interpolation when\n            sampling image features for each point. Defaults to True.\n        padding_mode (str, optional): Padding mode when padding values for\n            features of out-of-image points. Defaults to 'zeros'.\n        align_corners (bool, optional): Whether to align corners when\n            sampling image features for each point. Defaults to True.\n\n    Returns:\n        torch.Tensor: NxC image features sampled by point coordinates.\n    \"\"\"\n\n    # apply transformation based on info in img_meta\n    points = apply_3d_transformation(\n        points, coord_type, img_meta, reverse=True)\n\n    # project points to camera coordinate\n    pts_2d = points_cam2img(points, proj_mat)\n\n    # img transformation: scale -> crop -> flip\n    # the image is resized by img_scale_factor\n    img_coors = pts_2d[:, 0:2] * img_scale_factor  # Nx2\n    img_coors -= img_crop_offset\n\n    # grid sample, the valid grid range should be in [-1,1]\n    coor_x, coor_y = torch.split(img_coors, 1, dim=1)  # each is Nx1\n\n    if img_flip:\n        # by default we take it as horizontal flip\n        # use img_shape before padding for flip\n        orig_h, orig_w = img_shape\n        coor_x = orig_w - coor_x\n\n    h, w = img_pad_shape\n    coor_y = coor_y / h * 2 - 1\n    coor_x = coor_x / w * 2 - 1\n    grid = torch.cat([coor_x, coor_y],\n                     dim=1).unsqueeze(0).unsqueeze(0)  # Nx2 -> 1x1xNx2\n\n    # align_corner=True provides higher performance\n    mode = 'bilinear' if aligned else 'nearest'\n    point_features = F.grid_sample(\n        img_features,\n        grid,\n        mode=mode,\n        padding_mode=padding_mode,\n        align_corners=align_corners)  # 1xCx1xN feats\n\n    return point_features.squeeze().t()\n\n\n@FUSION_LAYERS.register_module()\nclass PointFusion(BaseModule):\n    \"\"\"Fuse image features from multi-scale features.\n\n    Args:\n        img_channels (list[int] | int): Channels of image features.\n            It could be a list if the input is multi-scale image features.\n        pts_channels (int): Channels of point features\n        mid_channels (int): Channels of middle layers\n        out_channels (int): Channels of output fused features\n        img_levels (int, optional): Number of image levels. Defaults to 3.\n        coord_type (str): 'DEPTH' or 'CAMERA' or 'LIDAR'.\n            Defaults to 'LIDAR'.\n        conv_cfg (dict, optional): Dict config of conv layers of middle\n            layers. Defaults to None.\n        norm_cfg (dict, optional): Dict config of norm layers of middle\n            layers. Defaults to None.\n        act_cfg (dict, optional): Dict config of activatation layers.\n            Defaults to None.\n        activate_out (bool, optional): Whether to apply relu activation\n            to output features. Defaults to True.\n        fuse_out (bool, optional): Whether apply conv layer to the fused\n            features. Defaults to False.\n        dropout_ratio (int, float, optional): Dropout ratio of image\n            features to prevent overfitting. Defaults to 0.\n        aligned (bool, optional): Whether apply aligned feature fusion.\n            Defaults to True.\n        align_corners (bool, optional): Whether to align corner when\n            sampling features according to points. Defaults to True.\n        padding_mode (str, optional): Mode used to pad the features of\n            points that do not have corresponding image features.\n            Defaults to 'zeros'.\n        lateral_conv (bool, optional): Whether to apply lateral convs\n            to image features. Defaults to True.\n    \"\"\"\n\n    def __init__(self,\n                 img_channels,\n                 pts_channels,\n                 mid_channels,\n                 out_channels,\n                 img_levels=3,\n                 coord_type='LIDAR',\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 act_cfg=None,\n                 init_cfg=None,\n                 activate_out=True,\n                 fuse_out=False,\n                 dropout_ratio=0,\n                 aligned=True,\n                 align_corners=True,\n                 padding_mode='zeros',\n                 lateral_conv=True):\n        super(PointFusion, self).__init__(init_cfg=init_cfg)\n        if isinstance(img_levels, int):\n            img_levels = [img_levels]\n        if isinstance(img_channels, int):\n            img_channels = [img_channels] * len(img_levels)\n        assert isinstance(img_levels, list)\n        assert isinstance(img_channels, list)\n        assert len(img_channels) == len(img_levels)\n\n        self.img_levels = img_levels\n        self.coord_type = coord_type\n        self.act_cfg = act_cfg\n        self.activate_out = activate_out\n        self.fuse_out = fuse_out\n        self.dropout_ratio = dropout_ratio\n        self.img_channels = img_channels\n        self.aligned = aligned\n        self.align_corners = align_corners\n        self.padding_mode = padding_mode\n\n        self.lateral_convs = None\n        if lateral_conv:\n            self.lateral_convs = nn.ModuleList()\n            for i in range(len(img_channels)):\n                l_conv = ConvModule(\n                    img_channels[i],\n                    mid_channels,\n                    3,\n                    padding=1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    act_cfg=self.act_cfg,\n                    inplace=False)\n                self.lateral_convs.append(l_conv)\n            self.img_transform = nn.Sequential(\n                nn.Linear(mid_channels * len(img_channels), out_channels),\n                nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),\n            )\n        else:\n            self.img_transform = nn.Sequential(\n                nn.Linear(sum(img_channels), out_channels),\n                nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),\n            )\n        self.pts_transform = nn.Sequential(\n            nn.Linear(pts_channels, out_channels),\n            nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),\n        )\n\n        if self.fuse_out:\n            self.fuse_conv = nn.Sequential(\n                nn.Linear(mid_channels, out_channels),\n                # For pts the BN is initialized differently by default\n                # TODO: check whether this is necessary\n                nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),\n                nn.ReLU(inplace=False))\n\n        if init_cfg is None:\n            self.init_cfg = [\n                dict(type='Xavier', layer='Conv2d', distribution='uniform'),\n                dict(type='Xavier', layer='Linear', distribution='uniform')\n            ]\n\n    def forward(self, img_feats, pts, pts_feats, img_metas):\n        \"\"\"Forward function.\n\n        Args:\n            img_feats (list[torch.Tensor]): Image features.\n            pts: [list[torch.Tensor]]: A batch of points with shape N x 3.\n            pts_feats (torch.Tensor): A tensor consist of point features of the\n                total batch.\n            img_metas (list[dict]): Meta information of images.\n\n        Returns:\n            torch.Tensor: Fused features of each point.\n        \"\"\"\n        img_pts = self.obtain_mlvl_feats(img_feats, pts, img_metas)\n        img_pre_fuse = self.img_transform(img_pts)\n        if self.training and self.dropout_ratio > 0:\n            img_pre_fuse = F.dropout(img_pre_fuse, self.dropout_ratio)\n        pts_pre_fuse = self.pts_transform(pts_feats)\n\n        fuse_out = img_pre_fuse + pts_pre_fuse\n        if self.activate_out:\n            fuse_out = F.relu(fuse_out)\n        if self.fuse_out:\n            fuse_out = self.fuse_conv(fuse_out)\n\n        return fuse_out\n\n    def obtain_mlvl_feats(self, img_feats, pts, img_metas):\n        \"\"\"Obtain multi-level features for each point.\n\n        Args:\n            img_feats (list(torch.Tensor)): Multi-scale image features produced\n                by image backbone in shape (N, C, H, W).\n            pts (list[torch.Tensor]): Points of each sample.\n            img_metas (list[dict]): Meta information for each sample.\n\n        Returns:\n            torch.Tensor: Corresponding image features of each point.\n        \"\"\"\n        if self.lateral_convs is not None:\n            img_ins = [\n                lateral_conv(img_feats[i])\n                for i, lateral_conv in zip(self.img_levels, self.lateral_convs)\n            ]\n        else:\n            img_ins = img_feats\n        img_feats_per_point = []\n        # Sample multi-level features\n        for i in range(len(img_metas)):\n            mlvl_img_feats = []\n            for level in range(len(self.img_levels)):\n                mlvl_img_feats.append(\n                    self.sample_single(img_ins[level][i:i + 1], pts[i][:, :3],\n                                       img_metas[i]))\n            mlvl_img_feats = torch.cat(mlvl_img_feats, dim=-1)\n            img_feats_per_point.append(mlvl_img_feats)\n\n        img_pts = torch.cat(img_feats_per_point, dim=0)\n        return img_pts\n\n    def sample_single(self, img_feats, pts, img_meta):\n        \"\"\"Sample features from single level image feature map.\n\n        Args:\n            img_feats (torch.Tensor): Image feature map in shape\n                (1, C, H, W).\n            pts (torch.Tensor): Points of a single sample.\n            img_meta (dict): Meta information of the single sample.\n\n        Returns:\n            torch.Tensor: Single level image features of each point.\n        \"\"\"\n        # TODO: image transformation also extracted\n        img_scale_factor = (\n            pts.new_tensor(img_meta['scale_factor'][:2])\n            if 'scale_factor' in img_meta.keys() else 1)\n        img_flip = img_meta['flip'] if 'flip' in img_meta.keys() else False\n        img_crop_offset = (\n            pts.new_tensor(img_meta['img_crop_offset'])\n            if 'img_crop_offset' in img_meta.keys() else 0)\n        proj_mat = get_proj_mat_by_coord_type(img_meta, self.coord_type)\n        img_pts = point_sample(\n            img_meta=img_meta,\n            img_features=img_feats,\n            points=pts,\n            proj_mat=pts.new_tensor(proj_mat),\n            coord_type=self.coord_type,\n            img_scale_factor=img_scale_factor,\n            img_crop_offset=img_crop_offset,\n            img_flip=img_flip,\n            img_pad_shape=img_meta['input_shape'][:2],\n            img_shape=img_meta['img_shape'][:2],\n            aligned=self.aligned,\n            padding_mode=self.padding_mode,\n            align_corners=self.align_corners,\n        )\n        return img_pts\n"
  },
  {
    "path": "mmdet3d/models/fusion_layers/vote_fusion.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom torch import nn as nn\n\nfrom mmdet3d.core.bbox import points_cam2img\nfrom ..builder import FUSION_LAYERS\nfrom . import apply_3d_transformation, bbox_2d_transform, coord_2d_transform\n\nEPS = 1e-6\n\n\n@FUSION_LAYERS.register_module()\nclass VoteFusion(nn.Module):\n    \"\"\"Fuse 2d features from 3d seeds.\n\n    Args:\n        num_classes (int): number of classes.\n        max_imvote_per_pixel (int): max number of imvotes.\n    \"\"\"\n\n    def __init__(self, num_classes=10, max_imvote_per_pixel=3):\n        super(VoteFusion, self).__init__()\n        self.num_classes = num_classes\n        self.max_imvote_per_pixel = max_imvote_per_pixel\n\n    def forward(self, imgs, bboxes_2d_rescaled, seeds_3d_depth, img_metas):\n        \"\"\"Forward function.\n\n        Args:\n            imgs (list[torch.Tensor]): Image features.\n            bboxes_2d_rescaled (list[torch.Tensor]): 2D bboxes.\n            seeds_3d_depth (torch.Tensor): 3D seeds.\n            img_metas (list[dict]): Meta information of images.\n\n        Returns:\n            torch.Tensor: Concatenated cues of each point.\n            torch.Tensor: Validity mask of each feature.\n        \"\"\"\n        img_features = []\n        masks = []\n        for i, data in enumerate(\n                zip(imgs, bboxes_2d_rescaled, seeds_3d_depth, img_metas)):\n            img, bbox_2d_rescaled, seed_3d_depth, img_meta = data\n            bbox_num = bbox_2d_rescaled.shape[0]\n            seed_num = seed_3d_depth.shape[0]\n\n            img_shape = img_meta['img_shape']\n            img_h, img_w, _ = img_shape\n\n            # first reverse the data transformations\n            xyz_depth = apply_3d_transformation(\n                seed_3d_depth, 'DEPTH', img_meta, reverse=True)\n\n            # project points from depth to image\n            depth2img = xyz_depth.new_tensor(img_meta['depth2img'])\n            uvz_origin = points_cam2img(xyz_depth, depth2img, True)\n            z_cam = uvz_origin[..., 2]\n            uv_origin = (uvz_origin[..., :2] - 1).round()\n\n            # rescale 2d coordinates and bboxes\n            uv_rescaled = coord_2d_transform(img_meta, uv_origin, True)\n            bbox_2d_origin = bbox_2d_transform(img_meta, bbox_2d_rescaled,\n                                               False)\n\n            if bbox_num == 0:\n                imvote_num = seed_num * self.max_imvote_per_pixel\n\n                # use zero features\n                two_cues = torch.zeros((15, imvote_num),\n                                       device=seed_3d_depth.device)\n                mask_zero = torch.zeros(\n                    imvote_num - seed_num, device=seed_3d_depth.device).bool()\n                mask_one = torch.ones(\n                    seed_num, device=seed_3d_depth.device).bool()\n                mask = torch.cat([mask_one, mask_zero], dim=0)\n            else:\n                # expand bboxes and seeds\n                bbox_expanded = bbox_2d_origin.view(1, bbox_num, -1).expand(\n                    seed_num, -1, -1)\n                seed_2d_expanded = uv_origin.view(seed_num, 1,\n                                                  -1).expand(-1, bbox_num, -1)\n                seed_2d_expanded_x, seed_2d_expanded_y = \\\n                    seed_2d_expanded.split(1, dim=-1)\n\n                bbox_expanded_l, bbox_expanded_t, bbox_expanded_r, \\\n                    bbox_expanded_b, bbox_expanded_conf, bbox_expanded_cls = \\\n                    bbox_expanded.split(1, dim=-1)\n                bbox_expanded_midx = (bbox_expanded_l + bbox_expanded_r) / 2\n                bbox_expanded_midy = (bbox_expanded_t + bbox_expanded_b) / 2\n\n                seed_2d_in_bbox_x = (seed_2d_expanded_x > bbox_expanded_l) * \\\n                    (seed_2d_expanded_x < bbox_expanded_r)\n                seed_2d_in_bbox_y = (seed_2d_expanded_y > bbox_expanded_t) * \\\n                    (seed_2d_expanded_y < bbox_expanded_b)\n                seed_2d_in_bbox = seed_2d_in_bbox_x * seed_2d_in_bbox_y\n\n                # semantic cues, dim=class_num\n                sem_cue = torch.zeros_like(bbox_expanded_conf).expand(\n                    -1, -1, self.num_classes)\n                sem_cue = sem_cue.scatter(-1, bbox_expanded_cls.long(),\n                                          bbox_expanded_conf)\n\n                # bbox center - uv\n                delta_u = bbox_expanded_midx - seed_2d_expanded_x\n                delta_v = bbox_expanded_midy - seed_2d_expanded_y\n\n                seed_3d_expanded = seed_3d_depth.view(seed_num, 1, -1).expand(\n                    -1, bbox_num, -1)\n\n                z_cam = z_cam.view(seed_num, 1, 1).expand(-1, bbox_num, -1)\n                imvote = torch.cat(\n                    [delta_u, delta_v,\n                     torch.zeros_like(delta_v)], dim=-1).view(-1, 3)\n                imvote = imvote * z_cam.reshape(-1, 1)\n                imvote = imvote @ torch.inverse(depth2img.t())\n\n                # apply transformation to lifted imvotes\n                imvote = apply_3d_transformation(\n                    imvote, 'DEPTH', img_meta, reverse=False)\n\n                seed_3d_expanded = seed_3d_expanded.reshape(imvote.shape)\n\n                # ray angle\n                ray_angle = seed_3d_expanded + imvote\n                ray_angle /= torch.sqrt(torch.sum(ray_angle**2, -1) +\n                                        EPS).unsqueeze(-1)\n\n                # imvote lifted to 3d\n                xz = ray_angle[:, [0, 2]] / (ray_angle[:, [1]] + EPS) \\\n                    * seed_3d_expanded[:, [1]] - seed_3d_expanded[:, [0, 2]]\n\n                # geometric cues, dim=5\n                geo_cue = torch.cat([xz, ray_angle],\n                                    dim=-1).view(seed_num, -1, 5)\n\n                two_cues = torch.cat([geo_cue, sem_cue], dim=-1)\n                # mask to 0 if seed not in bbox\n                two_cues = two_cues * seed_2d_in_bbox.float()\n\n                feature_size = two_cues.shape[-1]\n                # if bbox number is too small, append zeros\n                if bbox_num < self.max_imvote_per_pixel:\n                    append_num = self.max_imvote_per_pixel - bbox_num\n                    append_zeros = torch.zeros(\n                        (seed_num, append_num, 1),\n                        device=seed_2d_in_bbox.device).bool()\n                    seed_2d_in_bbox = torch.cat(\n                        [seed_2d_in_bbox, append_zeros], dim=1)\n                    append_zeros = torch.zeros(\n                        (seed_num, append_num, feature_size),\n                        device=two_cues.device)\n                    two_cues = torch.cat([two_cues, append_zeros], dim=1)\n                    append_zeros = torch.zeros((seed_num, append_num, 1),\n                                               device=two_cues.device)\n                    bbox_expanded_conf = torch.cat(\n                        [bbox_expanded_conf, append_zeros], dim=1)\n\n                # sort the valid seed-bbox pair according to confidence\n                pair_score = seed_2d_in_bbox.float() + bbox_expanded_conf\n                # and find the largests\n                mask, indices = pair_score.topk(\n                    self.max_imvote_per_pixel,\n                    dim=1,\n                    largest=True,\n                    sorted=True)\n\n                indices_img = indices.expand(-1, -1, feature_size)\n                two_cues = two_cues.gather(dim=1, index=indices_img)\n                two_cues = two_cues.transpose(1, 0)\n                two_cues = two_cues.reshape(-1, feature_size).transpose(\n                    1, 0).contiguous()\n\n                # since conf is ~ (0, 1), floor gives us validity\n                mask = mask.floor().int()\n                mask = mask.transpose(1, 0).reshape(-1).bool()\n\n            # clear the padding\n            img = img[:, :img_shape[0], :img_shape[1]]\n            img_flatten = img.reshape(3, -1).float()\n            img_flatten /= 255.\n\n            # take the normalized pixel value as texture cue\n            uv_rescaled[:, 0] = torch.clamp(uv_rescaled[:, 0].round(), 0,\n                                            img_shape[1] - 1)\n            uv_rescaled[:, 1] = torch.clamp(uv_rescaled[:, 1].round(), 0,\n                                            img_shape[0] - 1)\n            uv_flatten = uv_rescaled[:, 1].round() * \\\n                img_shape[1] + uv_rescaled[:, 0].round()\n            uv_expanded = uv_flatten.unsqueeze(0).expand(3, -1).long()\n            txt_cue = torch.gather(img_flatten, dim=-1, index=uv_expanded)\n            txt_cue = txt_cue.unsqueeze(1).expand(-1,\n                                                  self.max_imvote_per_pixel,\n                                                  -1).reshape(3, -1)\n\n            # append texture cue\n            img_feature = torch.cat([two_cues, txt_cue], dim=0)\n            img_features.append(img_feature)\n            masks.append(mask)\n\n        return torch.stack(img_features, 0), torch.stack(masks, 0)\n"
  },
  {
    "path": "mmdet3d/models/losses/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmdet.models.losses import FocalLoss, SmoothL1Loss, binary_cross_entropy\nfrom .axis_aligned_iou_loss import AxisAlignedIoULoss, axis_aligned_iou_loss\nfrom .chamfer_distance import ChamferDistance, chamfer_distance\nfrom .multibin_loss import MultiBinLoss\nfrom .paconv_regularization_loss import PAConvRegularizationLoss\nfrom .rotated_iou_loss import RotatedIoU3DLoss\nfrom .uncertain_smooth_l1_loss import UncertainL1Loss, UncertainSmoothL1Loss\n\n__all__ = [\n    'FocalLoss', 'SmoothL1Loss', 'binary_cross_entropy', 'ChamferDistance',\n    'chamfer_distance', 'axis_aligned_iou_loss', 'AxisAlignedIoULoss',\n    'PAConvRegularizationLoss', 'UncertainL1Loss', 'UncertainSmoothL1Loss',\n    'MultiBinLoss', 'RotatedIoU3DLoss'\n]\n"
  },
  {
    "path": "mmdet3d/models/losses/axis_aligned_iou_loss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom torch import nn as nn\n\nfrom mmdet.models.losses.utils import weighted_loss\nfrom ...core.bbox import AxisAlignedBboxOverlaps3D\nfrom ..builder import LOSSES\n\n\n@weighted_loss\ndef axis_aligned_iou_loss(pred, target):\n    \"\"\"Calculate the IoU loss (1-IoU) of two sets of axis aligned bounding\n    boxes. Note that predictions and targets are one-to-one corresponded.\n\n    Args:\n        pred (torch.Tensor): Bbox predictions with shape [..., 6]\n            (x1, y1, z1, x2, y2, z2).\n        target (torch.Tensor): Bbox targets (gt) with shape [..., 6]\n            (x1, y1, z1, x2, y2, z2).\n\n    Returns:\n        torch.Tensor: IoU loss between predictions and targets.\n    \"\"\"\n    axis_aligned_iou = AxisAlignedBboxOverlaps3D()(\n        pred, target, is_aligned=True)\n    iou_loss = 1 - axis_aligned_iou\n    return iou_loss\n\n\n@LOSSES.register_module()\nclass AxisAlignedIoULoss(nn.Module):\n    \"\"\"Calculate the IoU loss (1-IoU) of axis aligned bounding boxes.\n\n    Args:\n        reduction (str): Method to reduce losses.\n            The valid reduction method are none, sum or mean.\n        loss_weight (float, optional): Weight of loss. Defaults to 1.0.\n    \"\"\"\n\n    def __init__(self, reduction='mean', loss_weight=1.0):\n        super(AxisAlignedIoULoss, self).__init__()\n        assert reduction in ['none', 'sum', 'mean']\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None,\n                **kwargs):\n        \"\"\"Forward function of loss calculation.\n\n        Args:\n            pred (torch.Tensor): Bbox predictions with shape [..., 6]\n                (x1, y1, z1, x2, y2, z2).\n            target (torch.Tensor): Bbox targets (gt) with shape [..., 6]\n                (x1, y1, z1, x2, y2, z2).\n            weight (torch.Tensor | float, optional): Weight of loss.\n                Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): Method to reduce losses.\n                The valid reduction method are 'none', 'sum' or 'mean'.\n                Defaults to None.\n\n        Returns:\n            torch.Tensor: IoU loss between predictions and targets.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        if (weight is not None) and (not torch.any(weight > 0)) and (\n                reduction != 'none'):\n            return (pred * weight).sum()\n        return axis_aligned_iou_loss(\n            pred,\n            target,\n            weight=weight,\n            avg_factor=avg_factor,\n            reduction=reduction) * self.loss_weight\n"
  },
  {
    "path": "mmdet3d/models/losses/chamfer_distance.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom torch import nn as nn\nfrom torch.nn.functional import l1_loss, mse_loss, smooth_l1_loss\n\nfrom ..builder import LOSSES\n\n\ndef chamfer_distance(src,\n                     dst,\n                     src_weight=1.0,\n                     dst_weight=1.0,\n                     criterion_mode='l2',\n                     reduction='mean'):\n    \"\"\"Calculate Chamfer Distance of two sets.\n\n    Args:\n        src (torch.Tensor): Source set with shape [B, N, C] to\n            calculate Chamfer Distance.\n        dst (torch.Tensor): Destination set with shape [B, M, C] to\n            calculate Chamfer Distance.\n        src_weight (torch.Tensor or float): Weight of source loss.\n        dst_weight (torch.Tensor or float): Weight of destination loss.\n        criterion_mode (str): Criterion mode to calculate distance.\n            The valid modes are smooth_l1, l1 or l2.\n        reduction (str): Method to reduce losses.\n            The valid reduction method are 'none', 'sum' or 'mean'.\n\n    Returns:\n        tuple: Source and Destination loss with the corresponding indices.\n\n            - loss_src (torch.Tensor): The min distance\n                from source to destination.\n            - loss_dst (torch.Tensor): The min distance\n                from destination to source.\n            - indices1 (torch.Tensor): Index the min distance point\n                for each point in source to destination.\n            - indices2 (torch.Tensor): Index the min distance point\n                for each point in destination to source.\n    \"\"\"\n\n    if criterion_mode == 'smooth_l1':\n        criterion = smooth_l1_loss\n    elif criterion_mode == 'l1':\n        criterion = l1_loss\n    elif criterion_mode == 'l2':\n        criterion = mse_loss\n    else:\n        raise NotImplementedError\n\n    src_expand = src.unsqueeze(2).repeat(1, 1, dst.shape[1], 1)\n    dst_expand = dst.unsqueeze(1).repeat(1, src.shape[1], 1, 1)\n\n    distance = criterion(src_expand, dst_expand, reduction='none').sum(-1)\n    src2dst_distance, indices1 = torch.min(distance, dim=2)  # (B,N)\n    dst2src_distance, indices2 = torch.min(distance, dim=1)  # (B,M)\n\n    loss_src = (src2dst_distance * src_weight)\n    loss_dst = (dst2src_distance * dst_weight)\n\n    if reduction == 'sum':\n        loss_src = torch.sum(loss_src)\n        loss_dst = torch.sum(loss_dst)\n    elif reduction == 'mean':\n        loss_src = torch.mean(loss_src)\n        loss_dst = torch.mean(loss_dst)\n    elif reduction == 'none':\n        pass\n    else:\n        raise NotImplementedError\n\n    return loss_src, loss_dst, indices1, indices2\n\n\n@LOSSES.register_module()\nclass ChamferDistance(nn.Module):\n    \"\"\"Calculate Chamfer Distance of two sets.\n\n    Args:\n        mode (str): Criterion mode to calculate distance.\n            The valid modes are smooth_l1, l1 or l2.\n        reduction (str): Method to reduce losses.\n            The valid reduction method are none, sum or mean.\n        loss_src_weight (float): Weight of loss_source.\n        loss_dst_weight (float): Weight of loss_target.\n    \"\"\"\n\n    def __init__(self,\n                 mode='l2',\n                 reduction='mean',\n                 loss_src_weight=1.0,\n                 loss_dst_weight=1.0):\n        super(ChamferDistance, self).__init__()\n\n        assert mode in ['smooth_l1', 'l1', 'l2']\n        assert reduction in ['none', 'sum', 'mean']\n        self.mode = mode\n        self.reduction = reduction\n        self.loss_src_weight = loss_src_weight\n        self.loss_dst_weight = loss_dst_weight\n\n    def forward(self,\n                source,\n                target,\n                src_weight=1.0,\n                dst_weight=1.0,\n                reduction_override=None,\n                return_indices=False,\n                **kwargs):\n        \"\"\"Forward function of loss calculation.\n\n        Args:\n            source (torch.Tensor): Source set with shape [B, N, C] to\n                calculate Chamfer Distance.\n            target (torch.Tensor): Destination set with shape [B, M, C] to\n                calculate Chamfer Distance.\n            src_weight (torch.Tensor | float, optional):\n                Weight of source loss. Defaults to 1.0.\n            dst_weight (torch.Tensor | float, optional):\n                Weight of destination loss. Defaults to 1.0.\n            reduction_override (str, optional): Method to reduce losses.\n                The valid reduction method are 'none', 'sum' or 'mean'.\n                Defaults to None.\n            return_indices (bool, optional): Whether to return indices.\n                Defaults to False.\n\n        Returns:\n            tuple[torch.Tensor]: If ``return_indices=True``, return losses of\n                source and target with their corresponding indices in the\n                order of ``(loss_source, loss_target, indices1, indices2)``.\n                If ``return_indices=False``, return\n                ``(loss_source, loss_target)``.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n\n        loss_source, loss_target, indices1, indices2 = chamfer_distance(\n            source, target, src_weight, dst_weight, self.mode, reduction)\n\n        loss_source *= self.loss_src_weight\n        loss_target *= self.loss_dst_weight\n\n        if return_indices:\n            return loss_source, loss_target, indices1, indices2\n        else:\n            return loss_source, loss_target\n"
  },
  {
    "path": "mmdet3d/models/losses/multibin_loss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom torch import nn as nn\nfrom torch.nn import functional as F\n\nfrom mmdet.models.losses.utils import weighted_loss\nfrom ..builder import LOSSES\n\n\n@weighted_loss\ndef multibin_loss(pred_orientations, gt_orientations, num_dir_bins=4):\n    \"\"\"Multi-Bin Loss.\n\n    Args:\n        pred_orientations(torch.Tensor): Predicted local vector\n            orientation in [axis_cls, head_cls, sin, cos] format.\n            shape (N, num_dir_bins * 4)\n        gt_orientations(torch.Tensor): Corresponding gt bboxes,\n            shape (N, num_dir_bins * 2).\n        num_dir_bins(int, optional): Number of bins to encode\n            direction angle.\n            Defaults: 4.\n\n    Return:\n        torch.Tensor: Loss tensor.\n    \"\"\"\n    cls_losses = 0\n    reg_losses = 0\n    reg_cnt = 0\n    for i in range(num_dir_bins):\n        # bin cls loss\n        cls_ce_loss = F.cross_entropy(\n            pred_orientations[:, (i * 2):(i * 2 + 2)],\n            gt_orientations[:, i].long(),\n            reduction='mean')\n        # regression loss\n        valid_mask_i = (gt_orientations[:, i] == 1)\n        cls_losses += cls_ce_loss\n        if valid_mask_i.sum() > 0:\n            start = num_dir_bins * 2 + i * 2\n            end = start + 2\n            pred_offset = F.normalize(pred_orientations[valid_mask_i,\n                                                        start:end])\n            gt_offset_sin = torch.sin(gt_orientations[valid_mask_i,\n                                                      num_dir_bins + i])\n            gt_offset_cos = torch.cos(gt_orientations[valid_mask_i,\n                                                      num_dir_bins + i])\n            reg_loss = \\\n                F.l1_loss(pred_offset[:, 0], gt_offset_sin,\n                          reduction='none') + \\\n                F.l1_loss(pred_offset[:, 1], gt_offset_cos,\n                          reduction='none')\n\n            reg_losses += reg_loss.sum()\n            reg_cnt += valid_mask_i.sum()\n\n        return cls_losses / num_dir_bins + reg_losses / reg_cnt\n\n\n@LOSSES.register_module()\nclass MultiBinLoss(nn.Module):\n    \"\"\"Multi-Bin Loss for orientation.\n\n    Args:\n        reduction (str, optional): The method to reduce the loss.\n            Options are 'none', 'mean' and 'sum'. Defaults to 'none'.\n        loss_weight (float, optional): The weight of loss. Defaults\n            to 1.0.\n    \"\"\"\n\n    def __init__(self, reduction='none', loss_weight=1.0):\n        super(MultiBinLoss, self).__init__()\n        assert reduction in ['none', 'sum', 'mean']\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self, pred, target, num_dir_bins, reduction_override=None):\n        \"\"\"Forward function.\n\n        Args:\n            pred (torch.Tensor): The prediction.\n            target (torch.Tensor): The learning target of the prediction.\n            num_dir_bins (int): Number of bins to encode direction angle.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        loss = self.loss_weight * multibin_loss(\n            pred, target, num_dir_bins=num_dir_bins, reduction=reduction)\n        return loss\n"
  },
  {
    "path": "mmdet3d/models/losses/paconv_regularization_loss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom torch import nn as nn\n\nfrom mmdet3d.ops import PAConv, PAConvCUDA\nfrom mmdet.models.losses.utils import weight_reduce_loss\nfrom ..builder import LOSSES\n\n\ndef weight_correlation(conv):\n    \"\"\"Calculate correlations between kernel weights in Conv's weight bank as\n    regularization loss. The cosine similarity is used as metrics.\n\n    Args:\n        conv (nn.Module): A Conv modules to be regularized.\n            Currently we only support `PAConv` and `PAConvCUDA`.\n\n    Returns:\n        torch.Tensor: Correlations between each kernel weights in weight bank.\n    \"\"\"\n    assert isinstance(conv, (PAConv, PAConvCUDA)), \\\n        f'unsupported module type {type(conv)}'\n    kernels = conv.weight_bank  # [C_in, num_kernels * C_out]\n    in_channels = conv.in_channels\n    out_channels = conv.out_channels\n    num_kernels = conv.num_kernels\n\n    # [num_kernels, Cin * Cout]\n    flatten_kernels = kernels.view(in_channels, num_kernels, out_channels).\\\n        permute(1, 0, 2).reshape(num_kernels, -1)\n    # [num_kernels, num_kernels]\n    inner_product = torch.matmul(flatten_kernels, flatten_kernels.T)\n    # [num_kernels, 1]\n    kernel_norms = torch.sum(flatten_kernels**2, dim=-1, keepdim=True)**0.5\n    # [num_kernels, num_kernels]\n    kernel_norms = torch.matmul(kernel_norms, kernel_norms.T)\n    cosine_sims = inner_product / kernel_norms\n    # take upper triangular part excluding diagonal since we only compute\n    # correlation between different kernels once\n    # the square is to ensure positive loss, refer to:\n    # https://github.com/CVMI-Lab/PAConv/blob/main/scene_seg/tool/train.py#L208\n    corr = torch.sum(torch.triu(cosine_sims, diagonal=1)**2)\n\n    return corr\n\n\ndef paconv_regularization_loss(modules, reduction):\n    \"\"\"Computes correlation loss of PAConv weight kernels as regularization.\n\n    Args:\n        modules (List[nn.Module] | :obj:`generator`):\n            A list or a python generator of torch.nn.Modules.\n        reduction (str): Method to reduce losses among PAConv modules.\n            The valid reduction method are none, sum or mean.\n\n    Returns:\n        torch.Tensor: Correlation loss of kernel weights.\n    \"\"\"\n    corr_loss = []\n    for module in modules:\n        if isinstance(module, (PAConv, PAConvCUDA)):\n            corr_loss.append(weight_correlation(module))\n    corr_loss = torch.stack(corr_loss)\n\n    # perform reduction\n    corr_loss = weight_reduce_loss(corr_loss, reduction=reduction)\n\n    return corr_loss\n\n\n@LOSSES.register_module()\nclass PAConvRegularizationLoss(nn.Module):\n    \"\"\"Calculate correlation loss of kernel weights in PAConv's weight bank.\n\n    This is used as a regularization term in PAConv model training.\n\n    Args:\n        reduction (str): Method to reduce losses. The reduction is performed\n            among all PAConv modules instead of prediction tensors.\n            The valid reduction method are none, sum or mean.\n        loss_weight (float, optional): Weight of loss. Defaults to 1.0.\n    \"\"\"\n\n    def __init__(self, reduction='mean', loss_weight=1.0):\n        super(PAConvRegularizationLoss, self).__init__()\n        assert reduction in ['none', 'sum', 'mean']\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self, modules, reduction_override=None, **kwargs):\n        \"\"\"Forward function of loss calculation.\n\n        Args:\n            modules (List[nn.Module] | :obj:`generator`):\n                A list or a python generator of torch.nn.Modules.\n            reduction_override (str, optional): Method to reduce losses.\n                The valid reduction method are 'none', 'sum' or 'mean'.\n                Defaults to None.\n\n        Returns:\n            torch.Tensor: Correlation loss of kernel weights.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n\n        return self.loss_weight * paconv_regularization_loss(\n            modules, reduction=reduction)\n"
  },
  {
    "path": "mmdet3d/models/losses/rotated_iou_loss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.ops import diff_iou_rotated_3d\nfrom torch import nn as nn\n\nfrom mmdet.models.losses.utils import weighted_loss\nfrom ..builder import LOSSES\n\n\n@weighted_loss\ndef rotated_iou_3d_loss(pred, target):\n    \"\"\"Calculate the IoU loss (1-IoU) of two sets of rotated bounding boxes.\n    Note that predictions and targets are one-to-one corresponded.\n\n    Args:\n        pred (torch.Tensor): Bbox predictions with shape [N, 7]\n            (x, y, z, w, l, h, alpha).\n        target (torch.Tensor): Bbox targets (gt) with shape [N, 7]\n            (x, y, z, w, l, h, alpha).\n\n    Returns:\n        torch.Tensor: IoU loss between predictions and targets.\n    \"\"\"\n    iou_loss = 1 - diff_iou_rotated_3d(pred.unsqueeze(0),\n                                       target.unsqueeze(0))[0]\n    return iou_loss\n\n\n@LOSSES.register_module()\nclass RotatedIoU3DLoss(nn.Module):\n    \"\"\"Calculate the IoU loss (1-IoU) of rotated bounding boxes.\n\n    Args:\n        reduction (str): Method to reduce losses.\n            The valid reduction method are none, sum or mean.\n        loss_weight (float, optional): Weight of loss. Defaults to 1.0.\n    \"\"\"\n\n    def __init__(self, reduction='mean', loss_weight=1.0):\n        super().__init__()\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None,\n                **kwargs):\n        \"\"\"Forward function of loss calculation.\n\n        Args:\n            pred (torch.Tensor): Bbox predictions with shape [..., 7]\n                (x, y, z, w, l, h, alpha).\n            target (torch.Tensor): Bbox targets (gt) with shape [..., 7]\n                (x, y, z, w, l, h, alpha).\n            weight (torch.Tensor | float, optional): Weight of loss.\n                Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): Method to reduce losses.\n                The valid reduction method are 'none', 'sum' or 'mean'.\n                Defaults to None.\n\n        Returns:\n            torch.Tensor: IoU loss between predictions and targets.\n        \"\"\"\n        if weight is not None and not torch.any(weight > 0):\n            return pred.sum() * weight.sum()  # 0\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        if weight is not None and weight.dim() > 1:\n            weight = weight.mean(-1)\n        loss = self.loss_weight * rotated_iou_3d_loss(\n            pred,\n            target,\n            weight,\n            reduction=reduction,\n            avg_factor=avg_factor,\n            **kwargs)\n\n        return loss\n"
  },
  {
    "path": "mmdet3d/models/losses/uncertain_smooth_l1_loss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom torch import nn as nn\n\nfrom mmdet.models.losses.utils import weighted_loss\nfrom ..builder import LOSSES\n\n\n@weighted_loss\ndef uncertain_smooth_l1_loss(pred, target, sigma, alpha=1.0, beta=1.0):\n    \"\"\"Smooth L1 loss with uncertainty.\n\n    Args:\n        pred (torch.Tensor): The prediction.\n        target (torch.Tensor): The learning target of the prediction.\n        sigma (torch.Tensor): The sigma for uncertainty.\n        alpha (float, optional): The coefficient of log(sigma).\n            Defaults to 1.0.\n        beta (float, optional): The threshold in the piecewise function.\n            Defaults to 1.0.\n\n    Returns:\n        torch.Tensor: Calculated loss\n    \"\"\"\n    assert beta > 0\n    assert target.numel() > 0\n    assert pred.size() == target.size() == sigma.size(), 'The size of pred ' \\\n        f'{pred.size()}, target {target.size()}, and sigma {sigma.size()} ' \\\n        'are inconsistent.'\n    diff = torch.abs(pred - target)\n    loss = torch.where(diff < beta, 0.5 * diff * diff / beta,\n                       diff - 0.5 * beta)\n    loss = torch.exp(-sigma) * loss + alpha * sigma\n\n    return loss\n\n\n@weighted_loss\ndef uncertain_l1_loss(pred, target, sigma, alpha=1.0):\n    \"\"\"L1 loss with uncertainty.\n\n    Args:\n        pred (torch.Tensor): The prediction.\n        target (torch.Tensor): The learning target of the prediction.\n        sigma (torch.Tensor): The sigma for uncertainty.\n        alpha (float, optional): The coefficient of log(sigma).\n            Defaults to 1.0.\n\n    Returns:\n        torch.Tensor: Calculated loss\n    \"\"\"\n    assert target.numel() > 0\n    assert pred.size() == target.size() == sigma.size(), 'The size of pred ' \\\n        f'{pred.size()}, target {target.size()}, and sigma {sigma.size()} ' \\\n        'are inconsistent.'\n    loss = torch.abs(pred - target)\n    loss = torch.exp(-sigma) * loss + alpha * sigma\n    return loss\n\n\n@LOSSES.register_module()\nclass UncertainSmoothL1Loss(nn.Module):\n    r\"\"\"Smooth L1 loss with uncertainty.\n\n    Please refer to `PGD <https://arxiv.org/abs/2107.14160>`_ and\n    `Multi-Task Learning Using Uncertainty to Weigh Losses for Scene Geometry\n    and Semantics <https://arxiv.org/abs/1705.07115>`_ for more details.\n\n    Args:\n        alpha (float, optional): The coefficient of log(sigma).\n            Defaults to 1.0.\n        beta (float, optional): The threshold in the piecewise function.\n            Defaults to 1.0.\n        reduction (str, optional): The method to reduce the loss.\n            Options are 'none', 'mean' and 'sum'. Defaults to 'mean'.\n        loss_weight (float, optional): The weight of loss. Defaults to 1.0\n    \"\"\"\n\n    def __init__(self, alpha=1.0, beta=1.0, reduction='mean', loss_weight=1.0):\n        super(UncertainSmoothL1Loss, self).__init__()\n        assert reduction in ['none', 'sum', 'mean']\n        self.alpha = alpha\n        self.beta = beta\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self,\n                pred,\n                target,\n                sigma,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None,\n                **kwargs):\n        \"\"\"Forward function.\n\n        Args:\n            pred (torch.Tensor): The prediction.\n            target (torch.Tensor): The learning target of the prediction.\n            sigma (torch.Tensor): The sigma for uncertainty.\n            weight (torch.Tensor, optional): The weight of loss for each\n                prediction. Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        loss_bbox = self.loss_weight * uncertain_smooth_l1_loss(\n            pred,\n            target,\n            weight,\n            sigma=sigma,\n            alpha=self.alpha,\n            beta=self.beta,\n            reduction=reduction,\n            avg_factor=avg_factor,\n            **kwargs)\n        return loss_bbox\n\n\n@LOSSES.register_module()\nclass UncertainL1Loss(nn.Module):\n    \"\"\"L1 loss with uncertainty.\n\n    Args:\n        alpha (float, optional): The coefficient of log(sigma).\n            Defaults to 1.0.\n        reduction (str, optional): The method to reduce the loss.\n            Options are 'none', 'mean' and 'sum'. Defaults to 'mean'.\n        loss_weight (float, optional): The weight of loss. Defaults to 1.0.\n    \"\"\"\n\n    def __init__(self, alpha=1.0, reduction='mean', loss_weight=1.0):\n        super(UncertainL1Loss, self).__init__()\n        assert reduction in ['none', 'sum', 'mean']\n        self.alpha = alpha\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self,\n                pred,\n                target,\n                sigma,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function.\n\n        Args:\n            pred (torch.Tensor): The prediction.\n            target (torch.Tensor): The learning target of the prediction.\n            sigma (torch.Tensor): The sigma for uncertainty.\n            weight (torch.Tensor, optional): The weight of loss for each\n                prediction. Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        loss_bbox = self.loss_weight * uncertain_l1_loss(\n            pred,\n            target,\n            weight,\n            sigma=sigma,\n            alpha=self.alpha,\n            reduction=reduction,\n            avg_factor=avg_factor)\n        return loss_bbox\n"
  },
  {
    "path": "mmdet3d/models/middle_encoders/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .pillar_scatter import PointPillarsScatter\nfrom .sparse_encoder import SparseEncoder, SparseEncoderSASSD\nfrom .sparse_unet import SparseUNet\n\n__all__ = [\n    'PointPillarsScatter', 'SparseEncoder', 'SparseEncoderSASSD', 'SparseUNet'\n]\n"
  },
  {
    "path": "mmdet3d/models/middle_encoders/pillar_scatter.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.runner import auto_fp16\nfrom torch import nn\n\nfrom ..builder import MIDDLE_ENCODERS\n\n\n@MIDDLE_ENCODERS.register_module()\nclass PointPillarsScatter(nn.Module):\n    \"\"\"Point Pillar's Scatter.\n\n    Converts learned features from dense tensor to sparse pseudo image.\n\n    Args:\n        in_channels (int): Channels of input features.\n        output_shape (list[int]): Required output shape of features.\n    \"\"\"\n\n    def __init__(self, in_channels, output_shape):\n        super().__init__()\n        self.output_shape = output_shape\n        self.ny = output_shape[0]\n        self.nx = output_shape[1]\n        self.in_channels = in_channels\n        self.fp16_enabled = False\n\n    @auto_fp16(apply_to=('voxel_features', ))\n    def forward(self, voxel_features, coors, batch_size=None):\n        \"\"\"Foraward function to scatter features.\"\"\"\n        # TODO: rewrite the function in a batch manner\n        # no need to deal with different batch cases\n        if batch_size is not None:\n            return self.forward_batch(voxel_features, coors, batch_size)\n        else:\n            return self.forward_single(voxel_features, coors)\n\n    def forward_single(self, voxel_features, coors):\n        \"\"\"Scatter features of single sample.\n\n        Args:\n            voxel_features (torch.Tensor): Voxel features in shape (N, C).\n            coors (torch.Tensor): Coordinates of each voxel.\n                The first column indicates the sample ID.\n        \"\"\"\n        # Create the canvas for this sample\n        canvas = torch.zeros(\n            self.in_channels,\n            self.nx * self.ny,\n            dtype=voxel_features.dtype,\n            device=voxel_features.device)\n\n        indices = coors[:, 2] * self.nx + coors[:, 3]\n        indices = indices.long()\n        voxels = voxel_features.t()\n        # Now scatter the blob back to the canvas.\n        canvas[:, indices] = voxels\n        # Undo the column stacking to final 4-dim tensor\n        canvas = canvas.view(1, self.in_channels, self.ny, self.nx)\n        return canvas\n\n    def forward_batch(self, voxel_features, coors, batch_size):\n        \"\"\"Scatter features of single sample.\n\n        Args:\n            voxel_features (torch.Tensor): Voxel features in shape (N, C).\n            coors (torch.Tensor): Coordinates of each voxel in shape (N, 4).\n                The first column indicates the sample ID.\n            batch_size (int): Number of samples in the current batch.\n        \"\"\"\n        # batch_canvas will be the final output.\n        batch_canvas = []\n        for batch_itt in range(batch_size):\n            # Create the canvas for this sample\n            canvas = torch.zeros(\n                self.in_channels,\n                self.nx * self.ny,\n                dtype=voxel_features.dtype,\n                device=voxel_features.device)\n\n            # Only include non-empty pillars\n            batch_mask = coors[:, 0] == batch_itt\n            this_coors = coors[batch_mask, :]\n            indices = this_coors[:, 2] * self.nx + this_coors[:, 3]\n            indices = indices.type(torch.long)\n            voxels = voxel_features[batch_mask, :]\n            voxels = voxels.t()\n\n            # Now scatter the blob back to the canvas.\n            canvas[:, indices] = voxels\n\n            # Append to a list for later stacking.\n            batch_canvas.append(canvas)\n\n        # Stack to 3-dim tensor (batch-size, in_channels, nrows*ncols)\n        batch_canvas = torch.stack(batch_canvas, 0)\n\n        # Undo the column stacking to final 4-dim tensor\n        batch_canvas = batch_canvas.view(batch_size, self.in_channels, self.ny,\n                                         self.nx)\n\n        return batch_canvas\n"
  },
  {
    "path": "mmdet3d/models/middle_encoders/sparse_encoder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.ops import points_in_boxes_all, three_interpolate, three_nn\nfrom mmcv.runner import auto_fp16\nfrom torch import nn as nn\n\nfrom mmdet3d.ops import SparseBasicBlock, make_sparse_convmodule\nfrom mmdet3d.ops.spconv import IS_SPCONV2_AVAILABLE\nfrom mmdet.models.losses import sigmoid_focal_loss, smooth_l1_loss\nfrom ..builder import MIDDLE_ENCODERS\n\nif IS_SPCONV2_AVAILABLE:\n    from spconv.pytorch import SparseConvTensor, SparseSequential\nelse:\n    from mmcv.ops import SparseConvTensor, SparseSequential\n\n@MIDDLE_ENCODERS.register_module()\nclass SparseEncoder(nn.Module):\n    r\"\"\"Sparse encoder for SECOND and Part-A2.\n\n    Args:\n        in_channels (int): The number of input channels.\n        sparse_shape (list[int]): The sparse shape of input tensor.\n        order (list[str], optional): Order of conv module.\n            Defaults to ('conv', 'norm', 'act').\n        norm_cfg (dict, optional): Config of normalization layer. Defaults to\n            dict(type='BN1d', eps=1e-3, momentum=0.01).\n        base_channels (int, optional): Out channels for conv_input layer.\n            Defaults to 16.\n        output_channels (int, optional): Out channels for conv_out layer.\n            Defaults to 128.\n        encoder_channels (tuple[tuple[int]], optional):\n            Convolutional channels of each encode block.\n            Defaults to ((16, ), (32, 32, 32), (64, 64, 64), (64, 64, 64)).\n        encoder_paddings (tuple[tuple[int]], optional):\n            Paddings of each encode block.\n            Defaults to ((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, 1)).\n        block_type (str, optional): Type of the block to use.\n            Defaults to 'conv_module'.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 sparse_shape,\n                 order=('conv', 'norm', 'act'),\n                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n                 base_channels=16,\n                 output_channels=128,\n                 encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64,\n                                                                        64)),\n                 encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1,\n                                                                 1)),\n                 block_type='conv_module'):\n        super().__init__()\n        assert block_type in ['conv_module', 'basicblock']\n        self.sparse_shape = sparse_shape\n        self.in_channels = in_channels\n        self.order = order\n        self.base_channels = base_channels\n        self.output_channels = output_channels\n        self.encoder_channels = encoder_channels\n        self.encoder_paddings = encoder_paddings\n        self.stage_num = len(self.encoder_channels)\n        self.fp16_enabled = False\n        # Spconv init all weight on its own\n\n        assert isinstance(order, tuple) and len(order) == 3\n        assert set(order) == {'conv', 'norm', 'act'}\n\n        if self.order[0] != 'conv':  # pre activate\n            self.conv_input = make_sparse_convmodule(\n                in_channels,\n                self.base_channels,\n                3,\n                norm_cfg=norm_cfg,\n                padding=1,\n                indice_key='subm1',\n                conv_type='SubMConv3d',\n                order=('conv', ))\n        else:  # post activate\n            self.conv_input = make_sparse_convmodule(\n                in_channels,\n                self.base_channels,\n                3,\n                norm_cfg=norm_cfg,\n                padding=1,\n                indice_key='subm1',\n                conv_type='SubMConv3d')\n\n        encoder_out_channels = self.make_encoder_layers(\n            make_sparse_convmodule,\n            norm_cfg,\n            self.base_channels,\n            block_type=block_type)\n\n        self.conv_out = make_sparse_convmodule(\n            encoder_out_channels,\n            self.output_channels,\n            kernel_size=(3, 1, 1),\n            stride=(2, 1, 1),\n            norm_cfg=norm_cfg,\n            padding=0,\n            indice_key='spconv_down2',\n            conv_type='SparseConv3d')\n\n    @auto_fp16(apply_to=('voxel_features', ))\n    def forward(self, voxel_features, coors, batch_size):\n        \"\"\"Forward of SparseEncoder.\n\n        Args:\n            voxel_features (torch.Tensor): Voxel features in shape (N, C).\n            coors (torch.Tensor): Coordinates in shape (N, 4),\n                the columns in the order of (batch_idx, z_idx, y_idx, x_idx).\n            batch_size (int): Batch size.\n\n        Returns:\n            dict: Backbone features.\n        \"\"\"\n        coors = coors.int()\n        input_sp_tensor = SparseConvTensor(voxel_features, coors,\n                                           self.sparse_shape, batch_size)\n        x = self.conv_input(input_sp_tensor)\n\n        encode_features = []\n        for encoder_layer in self.encoder_layers:\n            x = encoder_layer(x)\n            encode_features.append(x)\n\n        # for detection head\n        # [200, 176, 5] -> [200, 176, 2]\n        out = self.conv_out(encode_features[-1])\n        spatial_features = out.dense()\n\n        N, C, D, H, W = spatial_features.shape\n        spatial_features = spatial_features.view(N, C * D, H, W)\n\n        return spatial_features\n\n    def make_encoder_layers(self,\n                            make_block,\n                            norm_cfg,\n                            in_channels,\n                            block_type='conv_module',\n                            conv_cfg=dict(type='SubMConv3d')):\n        \"\"\"make encoder layers using sparse convs.\n\n        Args:\n            make_block (method): A bounded function to build blocks.\n            norm_cfg (dict[str]): Config of normalization layer.\n            in_channels (int): The number of encoder input channels.\n            block_type (str, optional): Type of the block to use.\n                Defaults to 'conv_module'.\n            conv_cfg (dict, optional): Config of conv layer. Defaults to\n                dict(type='SubMConv3d').\n\n        Returns:\n            int: The number of encoder output channels.\n        \"\"\"\n        assert block_type in ['conv_module', 'basicblock']\n        self.encoder_layers = SparseSequential()\n\n        for i, blocks in enumerate(self.encoder_channels):\n            blocks_list = []\n            for j, out_channels in enumerate(tuple(blocks)):\n                padding = tuple(self.encoder_paddings[i])[j]\n                # each stage started with a spconv layer\n                # except the first stage\n                if i != 0 and j == 0 and block_type == 'conv_module':\n                    blocks_list.append(\n                        make_block(\n                            in_channels,\n                            out_channels,\n                            3,\n                            norm_cfg=norm_cfg,\n                            stride=2,\n                            padding=padding,\n                            indice_key=f'spconv{i + 1}',\n                            conv_type='SparseConv3d'))\n                elif block_type == 'basicblock':\n                    if j == len(blocks) - 1 and i != len(\n                            self.encoder_channels) - 1:\n                        blocks_list.append(\n                            make_block(\n                                in_channels,\n                                out_channels,\n                                3,\n                                norm_cfg=norm_cfg,\n                                stride=2,\n                                padding=padding,\n                                indice_key=f'spconv{i + 1}',\n                                conv_type='SparseConv3d'))\n                    else:\n                        blocks_list.append(\n                            SparseBasicBlock(\n                                out_channels,\n                                out_channels,\n                                norm_cfg=norm_cfg,\n                                conv_cfg=conv_cfg))\n                else:\n                    blocks_list.append(\n                        make_block(\n                            in_channels,\n                            out_channels,\n                            3,\n                            norm_cfg=norm_cfg,\n                            padding=padding,\n                            indice_key=f'subm{i + 1}',\n                            conv_type='SubMConv3d'))\n                in_channels = out_channels\n            stage_name = f'encoder_layer{i + 1}'\n            stage_layers = SparseSequential(*blocks_list)\n            self.encoder_layers.add_module(stage_name, stage_layers)\n        return out_channels\n\n\n@MIDDLE_ENCODERS.register_module()\nclass MySparseEncoder(nn.Module):\n    r\"\"\"Sparse encoder for SECOND and Part-A2.\n\n    Args:\n        in_channels (int): The number of input channels.\n        sparse_shape (list[int]): The sparse shape of input tensor.\n        order (list[str], optional): Order of conv module.\n            Defaults to ('conv', 'norm', 'act').\n        norm_cfg (dict, optional): Config of normalization layer. Defaults to\n            dict(type='BN1d', eps=1e-3, momentum=0.01).\n        base_channels (int, optional): Out channels for conv_input layer.\n            Defaults to 16.\n        output_channels (int, optional): Out channels for conv_out layer.\n            Defaults to 128.\n        encoder_channels (tuple[tuple[int]], optional):\n            Convolutional channels of each encode block.\n            Defaults to ((16, ), (32, 32, 32), (64, 64, 64), (64, 64, 64)).\n        encoder_paddings (tuple[tuple[int]], optional):\n            Paddings of each encode block.\n            Defaults to ((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, 1)).\n        block_type (str, optional): Type of the block to use.\n            Defaults to 'conv_module'.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 sparse_shape,\n                 order=('conv', 'norm', 'act'),\n                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n                 base_channels=16,\n                 output_channels=128,\n                 encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64,\n                                                                        64)),\n                 encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1,\n                                                                 1)),\n                 block_type='conv_module'):\n        super().__init__()\n        assert block_type in ['conv_module', 'basicblock']\n        self.sparse_shape = sparse_shape\n        self.in_channels = in_channels\n        self.order = order\n        self.base_channels = base_channels\n        self.output_channels = output_channels\n        self.encoder_channels = encoder_channels\n        self.encoder_paddings = encoder_paddings\n        self.stage_num = len(self.encoder_channels)\n        self.fp16_enabled = False\n        # Spconv init all weight on its own\n\n        assert isinstance(order, tuple) and len(order) == 3\n        assert set(order) == {'conv', 'norm', 'act'}\n\n        if self.order[0] != 'conv':  # pre activate\n            self.conv_input = make_sparse_convmodule(\n                in_channels,\n                self.base_channels,\n                3,\n                norm_cfg=norm_cfg,\n                padding=1,\n                indice_key='subm1',\n                conv_type='SubMConv3d',\n                order=('conv', ))\n        else:  # post activate\n            self.conv_input = make_sparse_convmodule(\n                in_channels,\n                self.base_channels,\n                3,\n                norm_cfg=norm_cfg,\n                padding=1,\n                indice_key='subm1',\n                conv_type='SubMConv3d')\n\n        encoder_out_channels = self.make_encoder_layers(\n            make_sparse_convmodule,\n            norm_cfg,\n            self.base_channels,\n            block_type=block_type)\n\n        self.conv_out = make_sparse_convmodule(\n            encoder_out_channels,\n            self.output_channels,\n            kernel_size=(3, 1, 1),\n            stride=(1, 1, 1),\n            norm_cfg=norm_cfg,\n            padding=(1, 0, 0),\n            indice_key='spconv_down2',\n            conv_type='SparseConv3d')\n\n    @auto_fp16(apply_to=('voxel_features', ))\n    def forward(self, voxel_features, coors, batch_size):\n        \"\"\"Forward of SparseEncoder.\n\n        Args:\n            voxel_features (torch.Tensor): Voxel features in shape (N, C).\n            coors (torch.Tensor): Coordinates in shape (N, 4),\n                the columns in the order of (batch_idx, z_idx, y_idx, x_idx).\n            batch_size (int): Batch size.\n\n        Returns:\n            dict: Backbone features.\n        \"\"\"\n        coors = coors.int()\n        input_sp_tensor = SparseConvTensor(voxel_features, coors,\n                                           self.sparse_shape, batch_size)\n        x = self.conv_input(input_sp_tensor)\n\n        encode_features = []\n        for encoder_layer in self.encoder_layers:\n            x = encoder_layer(x)\n            encode_features.append(x)\n\n        # for detection head\n        # [200, 176, 5] -> [200, 176, 2]\n        out = self.conv_out(encode_features[-1])\n        spatial_features = out.dense()\n\n        N, C, D, H, W = spatial_features.shape\n        spatial_features = spatial_features.permute(0, 1, 3, 4, 2)\n        # spatial_features = spatial_features.view(N, C * D, H, W)\n\n        return spatial_features\n\n    def make_encoder_layers(self,\n                            make_block,\n                            norm_cfg,\n                            in_channels,\n                            block_type='conv_module',\n                            conv_cfg=dict(type='SubMConv3d')):\n        \"\"\"make encoder layers using sparse convs.\n\n        Args:\n            make_block (method): A bounded function to build blocks.\n            norm_cfg (dict[str]): Config of normalization layer.\n            in_channels (int): The number of encoder input channels.\n            block_type (str, optional): Type of the block to use.\n                Defaults to 'conv_module'.\n            conv_cfg (dict, optional): Config of conv layer. Defaults to\n                dict(type='SubMConv3d').\n\n        Returns:\n            int: The number of encoder output channels.\n        \"\"\"\n        assert block_type in ['conv_module', 'basicblock']\n        self.encoder_layers = SparseSequential()\n\n        for i, blocks in enumerate(self.encoder_channels):\n            blocks_list = []\n            for j, out_channels in enumerate(tuple(blocks)):\n                padding = tuple(self.encoder_paddings[i])[j]\n                # each stage started with a spconv layer\n                # except the first stage\n                if i != 0 and j == 0 and block_type == 'conv_module':\n                    blocks_list.append(\n                        make_block(\n                            in_channels,\n                            out_channels,\n                            3,\n                            norm_cfg=norm_cfg,\n                            stride=2,\n                            padding=padding,\n                            indice_key=f'spconv{i + 1}',\n                            conv_type='SparseConv3d'))\n                elif block_type == 'basicblock':\n                    if j == len(blocks) - 1 and i != len(\n                            self.encoder_channels) - 1:\n                        blocks_list.append(\n                            make_block(\n                                in_channels,\n                                out_channels,\n                                3,\n                                norm_cfg=norm_cfg,\n                                stride=2,\n                                padding=padding,\n                                indice_key=f'spconv{i + 1}',\n                                conv_type='SparseConv3d'))\n                    else:\n                        blocks_list.append(\n                            SparseBasicBlock(\n                                out_channels,\n                                out_channels,\n                                norm_cfg=norm_cfg,\n                                conv_cfg=conv_cfg))\n                else:\n                    blocks_list.append(\n                        make_block(\n                            in_channels,\n                            out_channels,\n                            3,\n                            norm_cfg=norm_cfg,\n                            padding=padding,\n                            indice_key=f'subm{i + 1}',\n                            conv_type='SubMConv3d'))\n                in_channels = out_channels\n            stage_name = f'encoder_layer{i + 1}'\n            stage_layers = SparseSequential(*blocks_list)\n            self.encoder_layers.add_module(stage_name, stage_layers)\n        return out_channels\n\n\n@MIDDLE_ENCODERS.register_module()\nclass SparseEncoderSASSD(SparseEncoder):\n    r\"\"\"Sparse encoder for `SASSD <https://github.com/skyhehe123/SA-SSD>`_\n\n    Args:\n        in_channels (int): The number of input channels.\n        sparse_shape (list[int]): The sparse shape of input tensor.\n        order (list[str], optional): Order of conv module.\n            Defaults to ('conv', 'norm', 'act').\n        norm_cfg (dict, optional): Config of normalization layer. Defaults to\n            dict(type='BN1d', eps=1e-3, momentum=0.01).\n        base_channels (int, optional): Out channels for conv_input layer.\n            Defaults to 16.\n        output_channels (int, optional): Out channels for conv_out layer.\n            Defaults to 128.\n        encoder_channels (tuple[tuple[int]], optional):\n            Convolutional channels of each encode block.\n            Defaults to ((16, ), (32, 32, 32), (64, 64, 64), (64, 64, 64)).\n        encoder_paddings (tuple[tuple[int]], optional):\n            Paddings of each encode block.\n            Defaults to ((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, 1)).\n        block_type (str, optional): Type of the block to use.\n            Defaults to 'conv_module'.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 sparse_shape,\n                 order=('conv', 'norm', 'act'),\n                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n                 base_channels=16,\n                 output_channels=128,\n                 encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64,\n                                                                        64)),\n                 encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1,\n                                                                 1)),\n                 block_type='conv_module'):\n        super(SparseEncoderSASSD, self).__init__(\n            in_channels=in_channels,\n            sparse_shape=sparse_shape,\n            order=order,\n            norm_cfg=norm_cfg,\n            base_channels=base_channels,\n            output_channels=output_channels,\n            encoder_channels=encoder_channels,\n            encoder_paddings=encoder_paddings,\n            block_type=block_type)\n\n        self.point_fc = nn.Linear(112, 64, bias=False)\n        self.point_cls = nn.Linear(64, 1, bias=False)\n        self.point_reg = nn.Linear(64, 3, bias=False)\n\n    @auto_fp16(apply_to=('voxel_features', ))\n    def forward(self, voxel_features, coors, batch_size, test_mode=False):\n        \"\"\"Forward of SparseEncoder.\n\n        Args:\n            voxel_features (torch.Tensor): Voxel features in shape (N, C).\n            coors (torch.Tensor): Coordinates in shape (N, 4),\n                the columns in the order of (batch_idx, z_idx, y_idx, x_idx).\n            batch_size (int): Batch size.\n            test_mode (bool, optional): Whether in test mode.\n                Defaults to False.\n\n        Returns:\n            dict: Backbone features.\n            tuple[torch.Tensor]: Mean feature value of the points,\n                Classificaion result of the points,\n                Regression offsets of the points.\n        \"\"\"\n        coors = coors.int()\n        input_sp_tensor = SparseConvTensor(voxel_features, coors,\n                                           self.sparse_shape, batch_size)\n        x = self.conv_input(input_sp_tensor)\n\n        encode_features = []\n        for encoder_layer in self.encoder_layers:\n            x = encoder_layer(x)\n            encode_features.append(x)\n\n        # for detection head\n        # [200, 176, 5] -> [200, 176, 2]\n        out = self.conv_out(encode_features[-1])\n        spatial_features = out.dense()\n\n        N, C, D, H, W = spatial_features.shape\n        spatial_features = spatial_features.view(N, C * D, H, W)\n\n        if test_mode:\n            return spatial_features, None\n\n        points_mean = torch.zeros_like(voxel_features)\n        points_mean[:, 0] = coors[:, 0]\n        points_mean[:, 1:] = voxel_features[:, :3]\n\n        # auxiliary network\n        p0 = self.make_auxiliary_points(\n            encode_features[0],\n            points_mean,\n            offset=(0, -40., -3.),\n            voxel_size=(.1, .1, .2))\n\n        p1 = self.make_auxiliary_points(\n            encode_features[1],\n            points_mean,\n            offset=(0, -40., -3.),\n            voxel_size=(.2, .2, .4))\n\n        p2 = self.make_auxiliary_points(\n            encode_features[2],\n            points_mean,\n            offset=(0, -40., -3.),\n            voxel_size=(.4, .4, .8))\n\n        pointwise = torch.cat([p0, p1, p2], dim=-1)\n        pointwise = self.point_fc(pointwise)\n        point_cls = self.point_cls(pointwise)\n        point_reg = self.point_reg(pointwise)\n        point_misc = (points_mean, point_cls, point_reg)\n\n        return spatial_features, point_misc\n\n    def get_auxiliary_targets(self, nxyz, gt_boxes3d, enlarge=1.0):\n        \"\"\"Get auxiliary target.\n\n        Args:\n            nxyz (torch.Tensor): Mean features of the points.\n            gt_boxes3d (torch.Tensor): Coordinates in shape (N, 4),\n                the columns in the order of (batch_idx, z_idx, y_idx, x_idx).\n            enlarge (int, optional): Enlaged scale. Defaults to 1.0.\n\n        Returns:\n            tuple[torch.Tensor]: Label of the points and\n                center offsets of the points.\n        \"\"\"\n        center_offsets = list()\n        pts_labels = list()\n        for i in range(len(gt_boxes3d)):\n            boxes3d = gt_boxes3d[i].tensor.cpu()\n            idx = torch.nonzero(nxyz[:, 0] == i).view(-1)\n            new_xyz = nxyz[idx, 1:].cpu()\n\n            boxes3d[:, 3:6] *= enlarge\n\n            pts_in_flag, center_offset = self.calculate_pts_offsets(\n                new_xyz, boxes3d)\n            pts_label = pts_in_flag.max(0)[0].byte()\n            pts_labels.append(pts_label)\n            center_offsets.append(center_offset)\n\n        center_offsets = torch.cat(center_offsets).cuda()\n        pts_labels = torch.cat(pts_labels).to(center_offsets.device)\n\n        return pts_labels, center_offsets\n\n    def calculate_pts_offsets(self, points, boxes):\n        \"\"\"Find all boxes in which each point is, as well as the offsets from\n        the box centers.\n\n        Args:\n            points (torch.Tensor): [M, 3], [x, y, z] in LiDAR/DEPTH coordinate\n            boxes (torch.Tensor): [T, 7],\n                num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz],\n                (x, y, z) is the bottom center.\n\n        Returns:\n            tuple[torch.Tensor]: Point indices of boxes with the shape of\n                (T, M). Default background = 0.\n                And offsets from the box centers of points,\n                if it belows to the box, with the shape of (M, 3).\n                Default background = 0.\n        \"\"\"\n        boxes_num = len(boxes)\n        pts_num = len(points)\n        points = points.cuda()\n        boxes = boxes.to(points.device)\n\n        box_idxs_of_pts = points_in_boxes_all(points[None, ...], boxes[None,\n                                                                       ...])\n\n        pts_indices = box_idxs_of_pts.squeeze(0).transpose(0, 1)\n\n        center_offsets = torch.zeros_like(points).to(points.device)\n\n        for i in range(boxes_num):\n            for j in range(pts_num):\n                if pts_indices[i][j] == 1:\n                    center_offsets[j][0] = points[j][0] - boxes[i][0]\n                    center_offsets[j][1] = points[j][1] - boxes[i][1]\n                    center_offsets[j][2] = (\n                        points[j][2] - (boxes[i][2] + boxes[i][2] / 2.0))\n        return pts_indices.cpu(), center_offsets.cpu()\n\n    def aux_loss(self, points, point_cls, point_reg, gt_bboxes):\n        \"\"\"Calculate auxiliary loss.\n\n        Args:\n            points (torch.Tensor): Mean feature value of the points.\n            point_cls (torch.Tensor): Classificaion result of the points.\n            point_reg (torch.Tensor): Regression offsets of the points.\n            gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                boxes for each sample.\n\n        Returns:\n            dict: Backbone features.\n        \"\"\"\n        num_boxes = len(gt_bboxes)\n\n        pts_labels, center_targets = self.get_auxiliary_targets(\n            points, gt_bboxes)\n\n        rpn_cls_target = pts_labels.long()\n        pos = (pts_labels > 0).float()\n        neg = (pts_labels == 0).float()\n\n        pos_normalizer = pos.sum().clamp(min=1.0)\n\n        cls_weights = pos + neg\n        reg_weights = pos\n        reg_weights = reg_weights / pos_normalizer\n\n        aux_loss_cls = sigmoid_focal_loss(\n            point_cls,\n            rpn_cls_target,\n            weight=cls_weights,\n            avg_factor=pos_normalizer)\n\n        aux_loss_cls /= num_boxes\n\n        weight = reg_weights[..., None]\n        aux_loss_reg = smooth_l1_loss(point_reg, center_targets, beta=1 / 9.)\n        aux_loss_reg = torch.sum(aux_loss_reg * weight)[None]\n        aux_loss_reg /= num_boxes\n\n        aux_loss_cls, aux_loss_reg = [aux_loss_cls], [aux_loss_reg]\n\n        return dict(aux_loss_cls=aux_loss_cls, aux_loss_reg=aux_loss_reg)\n\n    def make_auxiliary_points(self,\n                              source_tensor,\n                              target,\n                              offset=(0., -40., -3.),\n                              voxel_size=(.05, .05, .1)):\n        \"\"\"Make auxiliary points for loss computation.\n\n        Args:\n            source_tensor (torch.Tensor): (M, C) features to be propigated.\n            target (torch.Tensor): (N, 4) bxyz positions of the\n                target features.\n            offset (tuple[float], optional): Voxelization offset.\n                Defaults to (0., -40., -3.)\n            voxel_size (tuple[float], optional): Voxelization size.\n                Defaults to (.05, .05, .1)\n\n        Returns:\n            torch.Tensor: (N, C) tensor of the features of the target features.\n        \"\"\"\n        # Tansfer tensor to points\n        source = source_tensor.indices.float()\n        offset = torch.Tensor(offset).to(source.device)\n        voxel_size = torch.Tensor(voxel_size).to(source.device)\n        source[:, 1:] = (\n            source[:, [3, 2, 1]] * voxel_size + offset + .5 * voxel_size)\n\n        source_feats = source_tensor.features[None, ...].transpose(1, 2)\n\n        # Interplate auxiliary points\n        dist, idx = three_nn(target[None, ...], source[None, ...])\n        dist_recip = 1.0 / (dist + 1e-8)\n        norm = torch.sum(dist_recip, dim=2, keepdim=True)\n        weight = dist_recip / norm\n        new_features = three_interpolate(source_feats.contiguous(), idx,\n                                         weight)\n\n        return new_features.squeeze(0).transpose(0, 1)\n"
  },
  {
    "path": "mmdet3d/models/middle_encoders/sparse_unet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet3d.ops.spconv import IS_SPCONV2_AVAILABLE\n\nif IS_SPCONV2_AVAILABLE:\n    from spconv.pytorch import SparseConvTensor, SparseSequential\nelse:\n    from mmcv.ops import SparseConvTensor, SparseSequential\n\nfrom mmcv.runner import BaseModule, auto_fp16\n\nfrom mmdet3d.ops import SparseBasicBlock, make_sparse_convmodule\nfrom mmdet3d.ops.sparse_block import replace_feature\nfrom ..builder import MIDDLE_ENCODERS\n\n\n@MIDDLE_ENCODERS.register_module()\nclass SparseUNet(BaseModule):\n    r\"\"\"SparseUNet for PartA^2.\n\n    See the `paper <https://arxiv.org/abs/1907.03670>`_ for more details.\n\n    Args:\n        in_channels (int): The number of input channels.\n        sparse_shape (list[int]): The sparse shape of input tensor.\n        norm_cfg (dict): Config of normalization layer.\n        base_channels (int): Out channels for conv_input layer.\n        output_channels (int): Out channels for conv_out layer.\n        encoder_channels (tuple[tuple[int]]):\n            Convolutional channels of each encode block.\n        encoder_paddings (tuple[tuple[int]]): Paddings of each encode block.\n        decoder_channels (tuple[tuple[int]]):\n            Convolutional channels of each decode block.\n        decoder_paddings (tuple[tuple[int]]): Paddings of each decode block.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 sparse_shape,\n                 order=('conv', 'norm', 'act'),\n                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n                 base_channels=16,\n                 output_channels=128,\n                 encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64,\n                                                                        64)),\n                 encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1,\n                                                                 1)),\n                 decoder_channels=((64, 64, 64), (64, 64, 32), (32, 32, 16),\n                                   (16, 16, 16)),\n                 decoder_paddings=((1, 0), (1, 0), (0, 0), (0, 1)),\n                 init_cfg=None):\n        super().__init__(init_cfg=init_cfg)\n        self.sparse_shape = sparse_shape\n        self.in_channels = in_channels\n        self.order = order\n        self.base_channels = base_channels\n        self.output_channels = output_channels\n        self.encoder_channels = encoder_channels\n        self.encoder_paddings = encoder_paddings\n        self.decoder_channels = decoder_channels\n        self.decoder_paddings = decoder_paddings\n        self.stage_num = len(self.encoder_channels)\n        self.fp16_enabled = False\n        # Spconv init all weight on its own\n\n        assert isinstance(order, tuple) and len(order) == 3\n        assert set(order) == {'conv', 'norm', 'act'}\n\n        if self.order[0] != 'conv':  # pre activate\n            self.conv_input = make_sparse_convmodule(\n                in_channels,\n                self.base_channels,\n                3,\n                norm_cfg=norm_cfg,\n                padding=1,\n                indice_key='subm1',\n                conv_type='SubMConv3d',\n                order=('conv', ))\n        else:  # post activate\n            self.conv_input = make_sparse_convmodule(\n                in_channels,\n                self.base_channels,\n                3,\n                norm_cfg=norm_cfg,\n                padding=1,\n                indice_key='subm1',\n                conv_type='SubMConv3d')\n\n        encoder_out_channels = self.make_encoder_layers(\n            make_sparse_convmodule, norm_cfg, self.base_channels)\n        self.make_decoder_layers(make_sparse_convmodule, norm_cfg,\n                                 encoder_out_channels)\n\n        self.conv_out = make_sparse_convmodule(\n            encoder_out_channels,\n            self.output_channels,\n            kernel_size=(3, 1, 1),\n            stride=(2, 1, 1),\n            norm_cfg=norm_cfg,\n            padding=0,\n            indice_key='spconv_down2',\n            conv_type='SparseConv3d')\n\n    @auto_fp16(apply_to=('voxel_features', ))\n    def forward(self, voxel_features, coors, batch_size):\n        \"\"\"Forward of SparseUNet.\n\n        Args:\n            voxel_features (torch.float32): Voxel features in shape [N, C].\n            coors (torch.int32): Coordinates in shape [N, 4],\n                the columns in the order of (batch_idx, z_idx, y_idx, x_idx).\n            batch_size (int): Batch size.\n\n        Returns:\n            dict[str, torch.Tensor]: Backbone features.\n        \"\"\"\n        coors = coors.int()\n        input_sp_tensor = SparseConvTensor(voxel_features, coors,\n                                           self.sparse_shape, batch_size)\n        x = self.conv_input(input_sp_tensor)\n\n        encode_features = []\n        for encoder_layer in self.encoder_layers:\n            x = encoder_layer(x)\n            encode_features.append(x)\n\n        # for detection head\n        # [200, 176, 5] -> [200, 176, 2]\n        out = self.conv_out(encode_features[-1])\n        spatial_features = out.dense()\n\n        N, C, D, H, W = spatial_features.shape\n        spatial_features = spatial_features.view(N, C * D, H, W)\n\n        # for segmentation head, with output shape:\n        # [400, 352, 11] <- [200, 176, 5]\n        # [800, 704, 21] <- [400, 352, 11]\n        # [1600, 1408, 41] <- [800, 704, 21]\n        # [1600, 1408, 41] <- [1600, 1408, 41]\n        decode_features = []\n        x = encode_features[-1]\n        for i in range(self.stage_num, 0, -1):\n            x = self.decoder_layer_forward(encode_features[i - 1], x,\n                                           getattr(self, f'lateral_layer{i}'),\n                                           getattr(self, f'merge_layer{i}'),\n                                           getattr(self, f'upsample_layer{i}'))\n            decode_features.append(x)\n\n        seg_features = decode_features[-1].features\n\n        ret = dict(\n            spatial_features=spatial_features, seg_features=seg_features)\n\n        return ret\n\n    def decoder_layer_forward(self, x_lateral, x_bottom, lateral_layer,\n                              merge_layer, upsample_layer):\n        \"\"\"Forward of upsample and residual block.\n\n        Args:\n            x_lateral (:obj:`SparseConvTensor`): Lateral tensor.\n            x_bottom (:obj:`SparseConvTensor`): Feature from bottom layer.\n            lateral_layer (SparseBasicBlock): Convolution for lateral tensor.\n            merge_layer (SparseSequential): Convolution for merging features.\n            upsample_layer (SparseSequential): Convolution for upsampling.\n\n        Returns:\n            :obj:`SparseConvTensor`: Upsampled feature.\n        \"\"\"\n        x = lateral_layer(x_lateral)\n        x = replace_feature(x, torch.cat((x_bottom.features, x.features),\n                                         dim=1))\n        x_merge = merge_layer(x)\n        x = self.reduce_channel(x, x_merge.features.shape[1])\n        x = replace_feature(x, x_merge.features + x.features)\n        x = upsample_layer(x)\n        return x\n\n    @staticmethod\n    def reduce_channel(x, out_channels):\n        \"\"\"reduce channel for element-wise addition.\n\n        Args:\n            x (:obj:`SparseConvTensor`): Sparse tensor, ``x.features``\n                are in shape (N, C1).\n            out_channels (int): The number of channel after reduction.\n\n        Returns:\n            :obj:`SparseConvTensor`: Channel reduced feature.\n        \"\"\"\n        features = x.features\n        n, in_channels = features.shape\n        assert (in_channels % out_channels\n                == 0) and (in_channels >= out_channels)\n        x = replace_feature(x, features.view(n, out_channels, -1).sum(dim=2))\n        return x\n\n    def make_encoder_layers(self, make_block, norm_cfg, in_channels):\n        \"\"\"make encoder layers using sparse convs.\n\n        Args:\n            make_block (method): A bounded function to build blocks.\n            norm_cfg (dict[str]): Config of normalization layer.\n            in_channels (int): The number of encoder input channels.\n\n        Returns:\n            int: The number of encoder output channels.\n        \"\"\"\n        self.encoder_layers = SparseSequential()\n\n        for i, blocks in enumerate(self.encoder_channels):\n            blocks_list = []\n            for j, out_channels in enumerate(tuple(blocks)):\n                padding = tuple(self.encoder_paddings[i])[j]\n                # each stage started with a spconv layer\n                # except the first stage\n                if i != 0 and j == 0:\n                    blocks_list.append(\n                        make_block(\n                            in_channels,\n                            out_channels,\n                            3,\n                            norm_cfg=norm_cfg,\n                            stride=2,\n                            padding=padding,\n                            indice_key=f'spconv{i + 1}',\n                            conv_type='SparseConv3d'))\n                else:\n                    blocks_list.append(\n                        make_block(\n                            in_channels,\n                            out_channels,\n                            3,\n                            norm_cfg=norm_cfg,\n                            padding=padding,\n                            indice_key=f'subm{i + 1}',\n                            conv_type='SubMConv3d'))\n                in_channels = out_channels\n            stage_name = f'encoder_layer{i + 1}'\n            stage_layers = SparseSequential(*blocks_list)\n            self.encoder_layers.add_module(stage_name, stage_layers)\n        return out_channels\n\n    def make_decoder_layers(self, make_block, norm_cfg, in_channels):\n        \"\"\"make decoder layers using sparse convs.\n\n        Args:\n            make_block (method): A bounded function to build blocks.\n            norm_cfg (dict[str]): Config of normalization layer.\n            in_channels (int): The number of encoder input channels.\n\n        Returns:\n            int: The number of encoder output channels.\n        \"\"\"\n        block_num = len(self.decoder_channels)\n        for i, block_channels in enumerate(self.decoder_channels):\n            paddings = self.decoder_paddings[i]\n            setattr(\n                self, f'lateral_layer{block_num - i}',\n                SparseBasicBlock(\n                    in_channels,\n                    block_channels[0],\n                    conv_cfg=dict(\n                        type='SubMConv3d', indice_key=f'subm{block_num - i}'),\n                    norm_cfg=norm_cfg))\n            setattr(\n                self, f'merge_layer{block_num - i}',\n                make_block(\n                    in_channels * 2,\n                    block_channels[1],\n                    3,\n                    norm_cfg=norm_cfg,\n                    padding=paddings[0],\n                    indice_key=f'subm{block_num - i}',\n                    conv_type='SubMConv3d'))\n            if block_num - i != 1:\n                setattr(\n                    self, f'upsample_layer{block_num - i}',\n                    make_block(\n                        in_channels,\n                        block_channels[2],\n                        3,\n                        norm_cfg=norm_cfg,\n                        indice_key=f'spconv{block_num - i}',\n                        conv_type='SparseInverseConv3d'))\n            else:\n                # use submanifold conv instead of inverse conv\n                # in the last block\n                setattr(\n                    self, f'upsample_layer{block_num - i}',\n                    make_block(\n                        in_channels,\n                        block_channels[2],\n                        3,\n                        norm_cfg=norm_cfg,\n                        padding=paddings[1],\n                        indice_key='subm1',\n                        conv_type='SubMConv3d'))\n            in_channels = block_channels[2]\n"
  },
  {
    "path": "mmdet3d/models/model_utils/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .edge_fusion_module import EdgeFusionModule\nfrom .transformer import GroupFree3DMHA\nfrom .vote_module import VoteModule\n\n__all__ = ['VoteModule', 'GroupFree3DMHA', 'EdgeFusionModule']\n"
  },
  {
    "path": "mmdet3d/models/model_utils/edge_fusion_module.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule\nfrom torch import nn as nn\nfrom torch.nn import functional as F\n\n\nclass EdgeFusionModule(BaseModule):\n    \"\"\"Edge Fusion Module for feature map.\n\n    Args:\n        out_channels (int): The number of output channels.\n        feat_channels (int): The number of channels in feature map\n            during edge feature fusion.\n        kernel_size (int, optional): Kernel size of convolution.\n            Default: 3.\n        act_cfg (dict, optional): Config of activation.\n            Default: dict(type='ReLU').\n        norm_cfg (dict, optional): Config of normalization.\n            Default: dict(type='BN1d')).\n    \"\"\"\n\n    def __init__(self,\n                 out_channels,\n                 feat_channels,\n                 kernel_size=3,\n                 act_cfg=dict(type='ReLU'),\n                 norm_cfg=dict(type='BN1d')):\n        super().__init__()\n        self.edge_convs = nn.Sequential(\n            ConvModule(\n                feat_channels,\n                feat_channels,\n                kernel_size=kernel_size,\n                padding=kernel_size // 2,\n                conv_cfg=dict(type='Conv1d'),\n                norm_cfg=norm_cfg,\n                act_cfg=act_cfg),\n            nn.Conv1d(feat_channels, out_channels, kernel_size=1))\n        self.feat_channels = feat_channels\n\n    def forward(self, features, fused_features, edge_indices, edge_lens,\n                output_h, output_w):\n        \"\"\"Forward pass.\n\n        Args:\n            features (torch.Tensor): Different representative features\n                for fusion.\n            fused_features (torch.Tensor): Different representative\n                features to be fused.\n            edge_indices (torch.Tensor): Batch image edge indices.\n            edge_lens (list[int]): List of edge length of each image.\n            output_h (int): Height of output feature map.\n            output_w (int): Width of output feature map.\n\n        Returns:\n            torch.Tensor: Fused feature maps.\n        \"\"\"\n        batch_size = features.shape[0]\n        # normalize\n        grid_edge_indices = edge_indices.view(batch_size, -1, 1, 2).float()\n        grid_edge_indices[..., 0] = \\\n            grid_edge_indices[..., 0] / (output_w - 1) * 2 - 1\n        grid_edge_indices[..., 1] = \\\n            grid_edge_indices[..., 1] / (output_h - 1) * 2 - 1\n\n        # apply edge fusion\n        edge_features = F.grid_sample(\n            features, grid_edge_indices, align_corners=True).squeeze(-1)\n        edge_output = self.edge_convs(edge_features)\n\n        for k in range(batch_size):\n            edge_indice_k = edge_indices[k, :edge_lens[k]]\n            fused_features[k, :, edge_indice_k[:, 1],\n                           edge_indice_k[:, 0]] += edge_output[\n                               k, :, :edge_lens[k]]\n\n        return fused_features\n"
  },
  {
    "path": "mmdet3d/models/model_utils/transformer.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.cnn.bricks.registry import ATTENTION\nfrom mmcv.cnn.bricks.transformer import POSITIONAL_ENCODING, MultiheadAttention\nfrom torch import nn as nn\n\n\n@ATTENTION.register_module()\nclass GroupFree3DMHA(MultiheadAttention):\n    \"\"\"A warpper for torch.nn.MultiheadAttention for GroupFree3D.\n\n    This module implements MultiheadAttention with identity connection,\n    and positional encoding used in DETR is also passed as input.\n\n    Args:\n        embed_dims (int): The embedding dimension.\n        num_heads (int): Parallel attention heads. Same as\n            `nn.MultiheadAttention`.\n        attn_drop (float, optional): A Dropout layer on attn_output_weights.\n            Defaults to 0.0.\n        proj_drop (float, optional): A Dropout layer. Defaults to 0.0.\n        dropout_layer (obj:`ConfigDict`, optional): The dropout_layer used\n            when adding the shortcut.\n        init_cfg (obj:`mmcv.ConfigDict`, optional): The Config for\n            initialization. Default: None.\n        batch_first (bool, optional): Key, Query and Value are shape of\n            (batch, n, embed_dim)\n            or (n, batch, embed_dim). Defaults to False.\n    \"\"\"\n\n    def __init__(self,\n                 embed_dims,\n                 num_heads,\n                 attn_drop=0.,\n                 proj_drop=0.,\n                 dropout_layer=dict(type='DropOut', drop_prob=0.),\n                 init_cfg=None,\n                 batch_first=False,\n                 **kwargs):\n        super().__init__(embed_dims, num_heads, attn_drop, proj_drop,\n                         dropout_layer, init_cfg, batch_first, **kwargs)\n\n    def forward(self,\n                query,\n                key,\n                value,\n                identity,\n                query_pos=None,\n                key_pos=None,\n                attn_mask=None,\n                key_padding_mask=None,\n                **kwargs):\n        \"\"\"Forward function for `GroupFree3DMHA`.\n\n        **kwargs allow passing a more general data flow when combining\n        with other operations in `transformerlayer`.\n\n        Args:\n            query (Tensor): The input query with shape [num_queries, bs,\n                embed_dims]. Same in `nn.MultiheadAttention.forward`.\n            key (Tensor): The key tensor with shape [num_keys, bs,\n                embed_dims]. Same in `nn.MultiheadAttention.forward`.\n                If None, the ``query`` will be used.\n            value (Tensor): The value tensor with same shape as `key`.\n                Same in `nn.MultiheadAttention.forward`.\n                If None, the `key` will be used.\n            identity (Tensor): This tensor, with the same shape as x,\n                will be used for the identity link. If None, `x` will be used.\n            query_pos (Tensor, optional): The positional encoding for query,\n                with the same shape as `x`. Defaults to None.\n                If not None, it will be added to `x` before forward function.\n            key_pos (Tensor, optional): The positional encoding for `key`,\n                with the same shape as `key`. Defaults to None. If not None,\n                it will be added to `key` before forward function. If None,\n                and `query_pos` has the same shape as `key`, then `query_pos`\n                will be used for `key_pos`. Defaults to None.\n            attn_mask (Tensor, optional): ByteTensor mask with shape\n                [num_queries, num_keys].\n                Same in `nn.MultiheadAttention.forward`. Defaults to None.\n            key_padding_mask (Tensor, optional): ByteTensor with shape\n                [bs, num_keys]. Same in `nn.MultiheadAttention.forward`.\n                Defaults to None.\n\n        Returns:\n            Tensor: forwarded results with shape [num_queries, bs, embed_dims].\n        \"\"\"\n\n        if hasattr(self, 'operation_name'):\n            if self.operation_name == 'self_attn':\n                value = value + query_pos\n            elif self.operation_name == 'cross_attn':\n                value = value + key_pos\n            else:\n                raise NotImplementedError(\n                    f'{self.__class__.name} '\n                    f\"can't be used as {self.operation_name}\")\n        else:\n            value = value + query_pos\n\n        return super(GroupFree3DMHA, self).forward(\n            query=query,\n            key=key,\n            value=value,\n            identity=identity,\n            query_pos=query_pos,\n            key_pos=key_pos,\n            attn_mask=attn_mask,\n            key_padding_mask=key_padding_mask,\n            **kwargs)\n\n\n@POSITIONAL_ENCODING.register_module()\nclass ConvBNPositionalEncoding(nn.Module):\n    \"\"\"Absolute position embedding with Conv learning.\n\n    Args:\n        input_channel (int): input features dim.\n        num_pos_feats (int, optional): output position features dim.\n            Defaults to 288 to be consistent with seed features dim.\n    \"\"\"\n\n    def __init__(self, input_channel, num_pos_feats=288):\n        super().__init__()\n        self.position_embedding_head = nn.Sequential(\n            nn.Conv1d(input_channel, num_pos_feats, kernel_size=1),\n            nn.BatchNorm1d(num_pos_feats), nn.ReLU(inplace=True),\n            nn.Conv1d(num_pos_feats, num_pos_feats, kernel_size=1))\n\n    def forward(self, xyz):\n        \"\"\"Forward pass.\n\n        Args:\n            xyz (Tensor)： (B, N, 3) the coordinates to embed.\n\n        Returns:\n            Tensor: (B, num_pos_feats, N) the embedded position features.\n        \"\"\"\n        xyz = xyz.permute(0, 2, 1)\n        position_embedding = self.position_embedding_head(xyz)\n        return position_embedding\n"
  },
  {
    "path": "mmdet3d/models/model_utils/vote_module.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv import is_tuple_of\nfrom mmcv.cnn import ConvModule\nfrom torch import nn as nn\n\nfrom mmdet3d.models.builder import build_loss\n\n\nclass VoteModule(nn.Module):\n    \"\"\"Vote module.\n\n    Generate votes from seed point features.\n\n    Args:\n        in_channels (int): Number of channels of seed point features.\n        vote_per_seed (int, optional): Number of votes generated from\n            each seed point. Default: 1.\n        gt_per_seed (int, optional): Number of ground truth votes generated\n            from each seed point. Default: 3.\n        num_points (int, optional): Number of points to be used for voting.\n            Default: 1.\n        conv_channels (tuple[int], optional): Out channels of vote\n            generating convolution. Default: (16, 16).\n        conv_cfg (dict, optional): Config of convolution.\n            Default: dict(type='Conv1d').\n        norm_cfg (dict, optional): Config of normalization.\n            Default: dict(type='BN1d').\n        norm_feats (bool, optional): Whether to normalize features.\n            Default: True.\n        with_res_feat (bool, optional): Whether to predict residual features.\n            Default: True.\n        vote_xyz_range (list[float], optional):\n            The range of points translation. Default: None.\n        vote_loss (dict, optional): Config of vote loss. Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 vote_per_seed=1,\n                 gt_per_seed=3,\n                 num_points=-1,\n                 conv_channels=(16, 16),\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=dict(type='BN1d'),\n                 act_cfg=dict(type='ReLU'),\n                 norm_feats=True,\n                 with_res_feat=True,\n                 vote_xyz_range=None,\n                 vote_loss=None):\n        super().__init__()\n        self.in_channels = in_channels\n        self.vote_per_seed = vote_per_seed\n        self.gt_per_seed = gt_per_seed\n        self.num_points = num_points\n        self.norm_feats = norm_feats\n        self.with_res_feat = with_res_feat\n\n        assert vote_xyz_range is None or is_tuple_of(vote_xyz_range, float)\n        self.vote_xyz_range = vote_xyz_range\n\n        if vote_loss is not None:\n            self.vote_loss = build_loss(vote_loss)\n\n        prev_channels = in_channels\n        vote_conv_list = list()\n        for k in range(len(conv_channels)):\n            vote_conv_list.append(\n                ConvModule(\n                    prev_channels,\n                    conv_channels[k],\n                    1,\n                    padding=0,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    act_cfg=act_cfg,\n                    bias=True,\n                    inplace=True))\n            prev_channels = conv_channels[k]\n        self.vote_conv = nn.Sequential(*vote_conv_list)\n\n        # conv_out predicts coordinate and residual features\n        if with_res_feat:\n            out_channel = (3 + in_channels) * self.vote_per_seed\n        else:\n            out_channel = 3 * self.vote_per_seed\n        self.conv_out = nn.Conv1d(prev_channels, out_channel, 1)\n\n    def forward(self, seed_points, seed_feats):\n        \"\"\"forward.\n\n        Args:\n            seed_points (torch.Tensor): Coordinate of the seed\n                points in shape (B, N, 3).\n            seed_feats (torch.Tensor): Features of the seed points in shape\n                (B, C, N).\n\n        Returns:\n            tuple[torch.Tensor]:\n\n                - vote_points: Voted xyz based on the seed points\n                    with shape (B, M, 3), ``M=num_seed*vote_per_seed``.\n                - vote_features: Voted features based on the seed points with\n                    shape (B, C, M) where ``M=num_seed*vote_per_seed``,\n                    ``C=vote_feature_dim``.\n        \"\"\"\n        if self.num_points != -1:\n            assert self.num_points < seed_points.shape[1], \\\n                f'Number of vote points ({self.num_points}) should be '\\\n                f'smaller than seed points size ({seed_points.shape[1]})'\n            seed_points = seed_points[:, :self.num_points]\n            seed_feats = seed_feats[..., :self.num_points]\n\n        batch_size, feat_channels, num_seed = seed_feats.shape\n        num_vote = num_seed * self.vote_per_seed\n        x = self.vote_conv(seed_feats)\n        # (batch_size, (3+out_dim)*vote_per_seed, num_seed)\n        votes = self.conv_out(x)\n\n        votes = votes.transpose(2, 1).view(batch_size, num_seed,\n                                           self.vote_per_seed, -1)\n\n        offset = votes[:, :, :, 0:3]\n        if self.vote_xyz_range is not None:\n            limited_offset_list = []\n            for axis in range(len(self.vote_xyz_range)):\n                limited_offset_list.append(offset[..., axis].clamp(\n                    min=-self.vote_xyz_range[axis],\n                    max=self.vote_xyz_range[axis]))\n            limited_offset = torch.stack(limited_offset_list, -1)\n            vote_points = (seed_points.unsqueeze(2) +\n                           limited_offset).contiguous()\n        else:\n            vote_points = (seed_points.unsqueeze(2) + offset).contiguous()\n        vote_points = vote_points.view(batch_size, num_vote, 3)\n        offset = offset.reshape(batch_size, num_vote, 3).transpose(2, 1)\n\n        if self.with_res_feat:\n            res_feats = votes[:, :, :, 3:]\n            vote_feats = (seed_feats.transpose(2, 1).unsqueeze(2) +\n                          res_feats).contiguous()\n            vote_feats = vote_feats.view(batch_size,\n                                         num_vote, feat_channels).transpose(\n                                             2, 1).contiguous()\n\n            if self.norm_feats:\n                features_norm = torch.norm(vote_feats, p=2, dim=1)\n                vote_feats = vote_feats.div(features_norm.unsqueeze(1))\n        else:\n            vote_feats = seed_feats\n        return vote_points, vote_feats, offset\n\n    def get_loss(self, seed_points, vote_points, seed_indices,\n                 vote_targets_mask, vote_targets):\n        \"\"\"Calculate loss of voting module.\n\n        Args:\n            seed_points (torch.Tensor): Coordinate of the seed points.\n            vote_points (torch.Tensor): Coordinate of the vote points.\n            seed_indices (torch.Tensor): Indices of seed points in raw points.\n            vote_targets_mask (torch.Tensor): Mask of valid vote targets.\n            vote_targets (torch.Tensor): Targets of votes.\n\n        Returns:\n            torch.Tensor: Weighted vote loss.\n        \"\"\"\n        batch_size, num_seed = seed_points.shape[:2]\n\n        seed_gt_votes_mask = torch.gather(vote_targets_mask, 1,\n                                          seed_indices).float()\n\n        seed_indices_expand = seed_indices.unsqueeze(-1).repeat(\n            1, 1, 3 * self.gt_per_seed)\n        seed_gt_votes = torch.gather(vote_targets, 1, seed_indices_expand)\n        seed_gt_votes += seed_points.repeat(1, 1, self.gt_per_seed)\n\n        weight = seed_gt_votes_mask / (torch.sum(seed_gt_votes_mask) + 1e-6)\n        distance = self.vote_loss(\n            vote_points.view(batch_size * num_seed, -1, 3),\n            seed_gt_votes.view(batch_size * num_seed, -1, 3),\n            dst_weight=weight.view(batch_size * num_seed, 1))[1]\n        vote_loss = torch.sum(torch.min(distance, dim=1)[0])\n\n        return vote_loss\n"
  },
  {
    "path": "mmdet3d/models/necks/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmdet.models.necks.fpn import FPN\nfrom .dla_neck import DLANeck\nfrom .fpn import CustomFPN\nfrom .imvoxel_neck import OutdoorImVoxelNeck\nfrom .lss_fpn import FPN_LSS\nfrom .pointnet2_fp_neck import PointNetFPNeck\nfrom .second_fpn import SECONDFPN\nfrom .view_transformer import LSSViewTransformer, LSSViewTransformerBEVDepth\n\n__all__ = [\n    'FPN', 'SECONDFPN', 'OutdoorImVoxelNeck', 'PointNetFPNeck', 'DLANeck',\n    'LSSViewTransformer', 'CustomFPN', 'FPN_LSS', 'LSSViewTransformerBEVDepth'\n]\n"
  },
  {
    "path": "mmdet3d/models/necks/dla_neck.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport math\n\nimport numpy as np\nfrom mmcv.cnn import ConvModule, build_conv_layer\nfrom mmcv.runner import BaseModule\nfrom torch import nn as nn\n\nfrom ..builder import NECKS\n\n\ndef fill_up_weights(up):\n    \"\"\"Simulated bilinear upsampling kernel.\n\n    Args:\n        up (nn.Module): ConvTranspose2d module.\n    \"\"\"\n    w = up.weight.data\n    f = math.ceil(w.size(2) / 2)\n    c = (2 * f - 1 - f % 2) / (2. * f)\n    for i in range(w.size(2)):\n        for j in range(w.size(3)):\n            w[0, 0, i, j] = \\\n                (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))\n    for c in range(1, w.size(0)):\n        w[c, 0, :, :] = w[0, 0, :, :]\n\n\nclass IDAUpsample(BaseModule):\n    \"\"\"Iterative Deep Aggregation (IDA) Upsampling module to upsample features\n    of different scales to a similar scale.\n\n    Args:\n        out_channels (int): Number of output channels for DeformConv.\n        in_channels (List[int]): List of input channels of multi-scale\n            feature maps.\n        kernel_sizes (List[int]): List of size of the convolving\n            kernel of different scales.\n        norm_cfg (dict, optional): Config dict for normalization layer.\n            Default: None.\n        use_dcn (bool, optional): If True, use DCNv2. Default: True.\n    \"\"\"\n\n    def __init__(\n        self,\n        out_channels,\n        in_channels,\n        kernel_sizes,\n        norm_cfg=None,\n        use_dcn=True,\n        init_cfg=None,\n    ):\n        super(IDAUpsample, self).__init__(init_cfg)\n        self.use_dcn = use_dcn\n        self.projs = nn.ModuleList()\n        self.ups = nn.ModuleList()\n        self.nodes = nn.ModuleList()\n\n        for i in range(1, len(in_channels)):\n            in_channel = in_channels[i]\n            up_kernel_size = int(kernel_sizes[i])\n            proj = ConvModule(\n                in_channel,\n                out_channels,\n                3,\n                padding=1,\n                bias=True,\n                conv_cfg=dict(type='DCNv2') if self.use_dcn else None,\n                norm_cfg=norm_cfg)\n            node = ConvModule(\n                out_channels,\n                out_channels,\n                3,\n                padding=1,\n                bias=True,\n                conv_cfg=dict(type='DCNv2') if self.use_dcn else None,\n                norm_cfg=norm_cfg)\n            up = build_conv_layer(\n                dict(type='deconv'),\n                out_channels,\n                out_channels,\n                up_kernel_size * 2,\n                stride=up_kernel_size,\n                padding=up_kernel_size // 2,\n                output_padding=0,\n                groups=out_channels,\n                bias=False)\n\n            self.projs.append(proj)\n            self.ups.append(up)\n            self.nodes.append(node)\n\n    def forward(self, mlvl_features, start_level, end_level):\n        \"\"\"Forward function.\n\n        Args:\n            mlvl_features (list[torch.Tensor]): Features from multiple layers.\n            start_level (int): Start layer for feature upsampling.\n            end_level (int): End layer for feature upsampling.\n        \"\"\"\n        for i in range(start_level, end_level - 1):\n            upsample = self.ups[i - start_level]\n            project = self.projs[i - start_level]\n            mlvl_features[i + 1] = upsample(project(mlvl_features[i + 1]))\n            node = self.nodes[i - start_level]\n            mlvl_features[i + 1] = node(mlvl_features[i + 1] +\n                                        mlvl_features[i])\n\n\nclass DLAUpsample(BaseModule):\n    \"\"\"Deep Layer Aggregation (DLA) Upsampling module for different scales\n    feature extraction, upsampling and fusion, It consists of groups of\n    IDAupsample modules.\n\n    Args:\n        start_level (int): The start layer.\n        channels (List[int]): List of input channels of multi-scale\n            feature maps.\n        scales(List[int]): List of scale of different layers' feature.\n        in_channels (NoneType, optional): List of input channels of\n            different scales. Default: None.\n        norm_cfg (dict, optional): Config dict for normalization layer.\n            Default: None.\n        use_dcn (bool, optional): Whether to use dcn in IDAup module.\n            Default: True.\n    \"\"\"\n\n    def __init__(self,\n                 start_level,\n                 channels,\n                 scales,\n                 in_channels=None,\n                 norm_cfg=None,\n                 use_dcn=True,\n                 init_cfg=None):\n        super(DLAUpsample, self).__init__(init_cfg)\n        self.start_level = start_level\n        if in_channels is None:\n            in_channels = channels\n        self.channels = channels\n        channels = list(channels)\n        scales = np.array(scales, dtype=int)\n        for i in range(len(channels) - 1):\n            j = -i - 2\n            setattr(\n                self, 'ida_{}'.format(i),\n                IDAUpsample(channels[j], in_channels[j:],\n                            scales[j:] // scales[j], norm_cfg, use_dcn))\n            scales[j + 1:] = scales[j]\n            in_channels[j + 1:] = [channels[j] for _ in channels[j + 1:]]\n\n    def forward(self, mlvl_features):\n        \"\"\"Forward function.\n\n        Args:\n            mlvl_features(list[torch.Tensor]): Features from multi-scale\n                layers.\n\n        Returns:\n            tuple[torch.Tensor]: Up-sampled features of different layers.\n        \"\"\"\n        outs = [mlvl_features[-1]]\n        for i in range(len(mlvl_features) - self.start_level - 1):\n            ida = getattr(self, 'ida_{}'.format(i))\n            ida(mlvl_features, len(mlvl_features) - i - 2, len(mlvl_features))\n            outs.insert(0, mlvl_features[-1])\n        return outs\n\n\n@NECKS.register_module()\nclass DLANeck(BaseModule):\n    \"\"\"DLA Neck.\n\n    Args:\n        in_channels (list[int], optional): List of input channels\n            of multi-scale feature map.\n        start_level (int, optional): The scale level where upsampling\n            starts. Default: 2.\n        end_level (int, optional): The scale level where upsampling\n            ends. Default: 5.\n        norm_cfg (dict, optional): Config dict for normalization\n            layer. Default: None.\n        use_dcn (bool, optional): Whether to use dcn in IDAup module.\n            Default: True.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=[16, 32, 64, 128, 256, 512],\n                 start_level=2,\n                 end_level=5,\n                 norm_cfg=None,\n                 use_dcn=True,\n                 init_cfg=None):\n        super(DLANeck, self).__init__(init_cfg)\n        self.start_level = start_level\n        self.end_level = end_level\n        scales = [2**i for i in range(len(in_channels[self.start_level:]))]\n        self.dla_up = DLAUpsample(\n            start_level=self.start_level,\n            channels=in_channels[self.start_level:],\n            scales=scales,\n            norm_cfg=norm_cfg,\n            use_dcn=use_dcn)\n        self.ida_up = IDAUpsample(\n            in_channels[self.start_level],\n            in_channels[self.start_level:self.end_level],\n            [2**i for i in range(self.end_level - self.start_level)], norm_cfg,\n            use_dcn)\n\n    def forward(self, x):\n        mlvl_features = [x[i] for i in range(len(x))]\n        mlvl_features = self.dla_up(mlvl_features)\n        outs = []\n        for i in range(self.end_level - self.start_level):\n            outs.append(mlvl_features[i].clone())\n        self.ida_up(outs, 0, len(outs))\n        return [outs[-1]]\n\n    def init_weights(self):\n        for m in self.modules():\n            if isinstance(m, nn.ConvTranspose2d):\n                # In order to be consistent with the source code,\n                # reset the ConvTranspose2d initialization parameters\n                m.reset_parameters()\n                # Simulated bilinear upsampling kernel\n                fill_up_weights(m)\n            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.Conv2d):\n                # In order to be consistent with the source code,\n                # reset the Conv2d initialization parameters\n                m.reset_parameters()\n"
  },
  {
    "path": "mmdet3d/models/necks/fpn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule, auto_fp16\nimport torch.utils.checkpoint as cp\n\nfrom ..builder import NECKS\n\n\n@NECKS.register_module()\nclass CustomFPN(BaseModule):\n    r\"\"\"Feature Pyramid Network.\n\n    This is an implementation of paper `Feature Pyramid Networks for Object\n    Detection <https://arxiv.org/abs/1612.03144>`_.\n\n    Args:\n        in_channels (List[int]): Number of input channels per scale.\n        out_channels (int): Number of output channels (used at each scale)\n        num_outs (int): Number of output scales.\n        start_level (int): Index of the start input backbone level used to\n            build the feature pyramid. Default: 0.\n        end_level (int): Index of the end input backbone level (exclusive) to\n            build the feature pyramid. Default: -1, which means the last level.\n        add_extra_convs (bool | str): If bool, it decides whether to add conv\n            layers on top of the original feature maps. Default to False.\n            If True, it is equivalent to `add_extra_convs='on_input'`.\n            If str, it specifies the source feature map of the extra convs.\n            Only the following options are allowed\n\n            - 'on_input': Last feat map of neck inputs (i.e. backbone feature).\n            - 'on_lateral':  Last feature map after lateral convs.\n            - 'on_output': The last output feature map after fpn convs.\n        relu_before_extra_convs (bool): Whether to apply relu before the extra\n            conv. Default: False.\n        no_norm_on_lateral (bool): Whether to apply norm on lateral.\n            Default: False.\n        conv_cfg (dict): Config dict for convolution layer. Default: None.\n        norm_cfg (dict): Config dict for normalization layer. Default: None.\n        act_cfg (str): Config dict for activation layer in ConvModule.\n            Default: None.\n        upsample_cfg (dict): Config dict for interpolate layer.\n            Default: `dict(mode='nearest')`\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n\n    Example:\n        >>> import torch\n        >>> in_channels = [2, 3, 5, 7]\n        >>> scales = [340, 170, 84, 43]\n        >>> inputs = [torch.rand(1, c, s, s)\n        ...           for c, s in zip(in_channels, scales)]\n        >>> self = FPN(in_channels, 11, len(in_channels)).eval()\n        >>> outputs = self.forward(inputs)\n        >>> for i in range(len(outputs)):\n        ...     print(f'outputs[{i}].shape = {outputs[i].shape}')\n        outputs[0].shape = torch.Size([1, 11, 340, 340])\n        outputs[1].shape = torch.Size([1, 11, 170, 170])\n        outputs[2].shape = torch.Size([1, 11, 84, 84])\n        outputs[3].shape = torch.Size([1, 11, 43, 43])\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 num_outs,\n                 start_level=0,\n                 end_level=-1,\n                 out_ids=[],\n                 add_extra_convs=False,\n                 relu_before_extra_convs=False,\n                 no_norm_on_lateral=False,\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 with_cp=False,\n                 act_cfg=None,\n                 upsample_cfg=dict(mode='nearest'),\n                 init_cfg=dict(\n                     type='Xavier', layer='Conv2d', distribution='uniform')):\n        super(CustomFPN, self).__init__(init_cfg)\n        assert isinstance(in_channels, list)\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        self.num_ins = len(in_channels)\n        self.num_outs = num_outs\n        self.relu_before_extra_convs = relu_before_extra_convs\n        self.no_norm_on_lateral = no_norm_on_lateral\n        self.fp16_enabled = False\n        self.with_cp = with_cp\n        self.upsample_cfg = upsample_cfg.copy()\n        self.out_ids = out_ids\n        if end_level == -1:\n            self.backbone_end_level = self.num_ins\n            # assert num_outs >= self.num_ins - start_level\n        else:\n            # if end_level < inputs, no extra level is allowed\n            self.backbone_end_level = end_level\n            assert end_level <= len(in_channels)\n            assert num_outs == end_level - start_level\n        self.start_level = start_level\n        self.end_level = end_level\n        self.add_extra_convs = add_extra_convs\n        assert isinstance(add_extra_convs, (str, bool))\n        if isinstance(add_extra_convs, str):\n            # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output'\n            assert add_extra_convs in ('on_input', 'on_lateral', 'on_output')\n        elif add_extra_convs:  # True\n            self.add_extra_convs = 'on_input'\n\n        self.lateral_convs = nn.ModuleList()\n        self.fpn_convs = nn.ModuleList()\n\n        for i in range(self.start_level, self.backbone_end_level):\n            l_conv = ConvModule(\n                in_channels[i],\n                out_channels,\n                1,\n                conv_cfg=conv_cfg,\n                norm_cfg=norm_cfg if not self.no_norm_on_lateral else None,\n                act_cfg=act_cfg,\n                inplace=False)\n\n            self.lateral_convs.append(l_conv)\n            if i in self.out_ids:\n                fpn_conv = ConvModule(\n                    out_channels,\n                    out_channels,\n                    3,\n                    padding=1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    act_cfg=act_cfg,\n                    inplace=False)\n                self.fpn_convs.append(fpn_conv)\n\n        # add extra conv layers (e.g., RetinaNet)\n        extra_levels = num_outs - self.backbone_end_level + self.start_level\n        if self.add_extra_convs and extra_levels >= 1:\n            for i in range(extra_levels):\n                if i == 0 and self.add_extra_convs == 'on_input':\n                    in_channels = self.in_channels[self.backbone_end_level - 1]\n                else:\n                    in_channels = out_channels\n                extra_fpn_conv = ConvModule(\n                    in_channels,\n                    out_channels,\n                    3,\n                    stride=2,\n                    padding=1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    act_cfg=act_cfg,\n                    inplace=False)\n                self.fpn_convs.append(extra_fpn_conv)\n\n    @auto_fp16()\n    def forward(self, inputs):\n        \"\"\"Forward function.\"\"\"\n        assert len(inputs) == len(self.in_channels)\n\n        # build laterals\n        laterals = [\n            lateral_conv(inputs[i + self.start_level])\n            for i, lateral_conv in enumerate(self.lateral_convs)\n        ]\n\n        # build top-down path\n        used_backbone_levels = len(laterals)\n        for i in range(used_backbone_levels - 1, 0, -1):\n            # In some cases, fixing `scale factor` (e.g. 2) is preferred, but\n            #  it cannot co-exist with `size` in `F.interpolate`.\n            if 'scale_factor' in self.upsample_cfg:\n                laterals[i - 1] += F.interpolate(laterals[i],\n                                                 **self.upsample_cfg)\n            else:\n                prev_shape = laterals[i - 1].shape[2:]\n                laterals[i - 1] += F.interpolate(\n                    laterals[i], size=prev_shape, **self.upsample_cfg)\n\n        # build outputs\n        # part 1: from original levels\n        outs = [self.fpn_convs[i](laterals[i]) for i in self.out_ids]\n        # part 2: add extra levels\n        if self.num_outs > len(outs):\n            # use max pool to get more levels on top of outputs\n            # (e.g., Faster R-CNN, Mask R-CNN)\n            if not self.add_extra_convs:\n                for i in range(self.num_outs - used_backbone_levels):\n                    outs.append(F.max_pool2d(outs[-1], 1, stride=2))\n            # add conv layers on top of original feature maps (RetinaNet)\n            else:\n                if self.add_extra_convs == 'on_input':\n                    extra_source = inputs[self.backbone_end_level - 1]\n                elif self.add_extra_convs == 'on_lateral':\n                    extra_source = laterals[-1]\n                elif self.add_extra_convs == 'on_output':\n                    extra_source = outs[-1]\n                else:\n                    raise NotImplementedError\n                outs.append(self.fpn_convs[used_backbone_levels](extra_source))\n                for i in range(used_backbone_levels + 1, self.num_outs):\n                    if self.relu_before_extra_convs:\n                        outs.append(self.fpn_convs[i](F.relu(outs[-1])))\n                    else:\n                        outs.append(self.fpn_convs[i](outs[-1]))\n        return outs[0]\n"
  },
  {
    "path": "mmdet3d/models/necks/imvoxel_neck.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.cnn import ConvModule\nfrom torch import nn\n\nfrom ..builder import NECKS\n\n\n@NECKS.register_module()\nclass OutdoorImVoxelNeck(nn.Module):\n    \"\"\"Neck for ImVoxelNet outdoor scenario.\n\n    Args:\n        in_channels (int): Input channels of multi-scale feature map.\n        out_channels (int): Output channels of multi-scale feature map.\n    \"\"\"\n\n    def __init__(self, in_channels, out_channels):\n        super().__init__()\n        self.model = nn.Sequential(\n            ResModule(in_channels),\n            ConvModule(\n                in_channels=in_channels,\n                out_channels=in_channels * 2,\n                kernel_size=3,\n                stride=(1, 1, 2),\n                padding=1,\n                conv_cfg=dict(type='Conv3d'),\n                norm_cfg=dict(type='BN3d'),\n                act_cfg=dict(type='ReLU', inplace=True)),\n            ResModule(in_channels * 2),\n            ConvModule(\n                in_channels=in_channels * 2,\n                out_channels=in_channels * 4,\n                kernel_size=3,\n                stride=(1, 1, 2),\n                padding=1,\n                conv_cfg=dict(type='Conv3d'),\n                norm_cfg=dict(type='BN3d'),\n                act_cfg=dict(type='ReLU', inplace=True)),\n            ResModule(in_channels * 4),\n            ConvModule(\n                in_channels=in_channels * 4,\n                out_channels=out_channels,\n                kernel_size=3,\n                padding=(1, 1, 0),\n                conv_cfg=dict(type='Conv3d'),\n                norm_cfg=dict(type='BN3d'),\n                act_cfg=dict(type='ReLU', inplace=True)))\n\n    def forward(self, x):\n        \"\"\"Forward function.\n\n        Args:\n            x (torch.Tensor): of shape (N, C_in, N_x, N_y, N_z).\n\n        Returns:\n            list[torch.Tensor]: of shape (N, C_out, N_y, N_x).\n        \"\"\"\n        x = self.model.forward(x)\n        assert x.shape[-1] == 1\n        # Anchor3DHead axis order is (y, x).\n        return [x[..., 0].transpose(-1, -2)]\n\n    def init_weights(self):\n        \"\"\"Initialize weights of neck.\"\"\"\n        pass\n\n\nclass ResModule(nn.Module):\n    \"\"\"3d residual block for ImVoxelNeck.\n\n    Args:\n        n_channels (int): Input channels of a feature map.\n    \"\"\"\n\n    def __init__(self, n_channels):\n        super().__init__()\n        self.conv0 = ConvModule(\n            in_channels=n_channels,\n            out_channels=n_channels,\n            kernel_size=3,\n            padding=1,\n            conv_cfg=dict(type='Conv3d'),\n            norm_cfg=dict(type='BN3d'),\n            act_cfg=dict(type='ReLU', inplace=True))\n        self.conv1 = ConvModule(\n            in_channels=n_channels,\n            out_channels=n_channels,\n            kernel_size=3,\n            padding=1,\n            conv_cfg=dict(type='Conv3d'),\n            norm_cfg=dict(type='BN3d'),\n            act_cfg=None)\n        self.activation = nn.ReLU(inplace=True)\n\n    def forward(self, x):\n        \"\"\"Forward function.\n\n        Args:\n            x (torch.Tensor): of shape (N, C, N_x, N_y, N_z).\n\n        Returns:\n            torch.Tensor: 5d feature map.\n        \"\"\"\n        identity = x\n        x = self.conv0(x)\n        x = self.conv1(x)\n        x = identity + x\n        x = self.activation(x)\n        return x\n"
  },
  {
    "path": "mmdet3d/models/necks/lss_fpn.py",
    "content": "# Copyright (c) Phigent Robotics. All rights reserved.\n\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import build_norm_layer\nimport torch.nn.functional as F\nfrom mmdet.models import NECKS\nimport torch.utils.checkpoint as cp\n\n@NECKS.register_module()\nclass FPN_LSS(nn.Module):\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 scale_factor=4,\n                 input_feature_index=(0, 2),\n                 norm_cfg=dict(type='BN'),\n                 extra_upsample=2,\n                 lateral=None,\n                 with_cp=False,\n                 use_input_conv=False):\n        super().__init__()\n        self.input_feature_index = input_feature_index\n        self.extra_upsample = extra_upsample is not None\n        self.with_cp = with_cp\n        # self.up = nn.Upsample(\n        #     scale_factor=scale_factor, mode='bilinear', align_corners=True)\n        # assert norm_cfg['type'] in ['BN', 'SyncBN']\n        channels_factor = 2 if self.extra_upsample else 1\n        self.input_conv = nn.Sequential(\n            nn.Conv2d(\n                in_channels,\n                out_channels * channels_factor,\n                kernel_size=1,\n                padding=0,\n                bias=False),\n            build_norm_layer(\n                norm_cfg, out_channels * channels_factor, postfix=0)[1],\n            nn.ReLU(inplace=True),\n        ) if use_input_conv else None\n        if use_input_conv:\n            in_channels = out_channels * channels_factor\n        self.conv = nn.Sequential(\n            nn.Conv2d(\n                in_channels,\n                out_channels * channels_factor,\n                kernel_size=3,\n                padding=1,\n                bias=False),\n            build_norm_layer(\n                norm_cfg, out_channels * channels_factor, postfix=0)[1],\n            nn.ReLU(inplace=True),\n            nn.Conv2d(\n                out_channels * channels_factor,\n                out_channels * channels_factor,\n                kernel_size=3,\n                padding=1,\n                bias=False),\n            build_norm_layer(\n                norm_cfg, out_channels * channels_factor, postfix=0)[1],\n            nn.ReLU(inplace=True),\n        )\n        if self.extra_upsample:\n            self.up2 = nn.Sequential(\n                nn.Upsample(\n                    scale_factor=extra_upsample,\n                    mode='bilinear',\n                    align_corners=True),\n                nn.Conv2d(\n                    out_channels * channels_factor,\n                    out_channels,\n                    kernel_size=3,\n                    padding=1,\n                    bias=False),\n                build_norm_layer(norm_cfg, out_channels, postfix=0)[1],\n                nn.ReLU(inplace=True),\n                nn.Conv2d(\n                    out_channels, out_channels, kernel_size=1, padding=0),\n            )\n        self.lateral = lateral is not None\n        if self.lateral:\n            self.lateral_conv = nn.Sequential(\n                nn.Conv2d(\n                    lateral, lateral, kernel_size=1, padding=0, bias=False),\n                build_norm_layer(norm_cfg, lateral, postfix=0)[1],\n                nn.ReLU(inplace=True),\n            )\n\n    def forward(self, feats):\n        x2, x1 = feats[self.input_feature_index[0]], \\\n                 feats[self.input_feature_index[1]]\n        if self.with_cp:\n            if self.lateral:\n                x2 = cp.checkpoint(self.lateral_conv, x2)\n            x1 = F.interpolate(x1, size=x2.shape[2:])\n            x = torch.cat([x2, x1], dim=1)\n            if self.input_conv is not None:\n                x = cp.checkpoint(self.input_conv, x)\n            x = cp.checkpoint(self.conv, x)\n            if self.extra_upsample:\n                x = cp.checkpoint(self.up2, x)\n        else:\n            if self.lateral:\n                x2 = self.lateral_conv(x2)\n            x1 = F.interpolate(x1, size=x2.shape[2:])\n            x = torch.cat([x2, x1], dim=1)\n            if self.input_conv is not None:\n                x = self.input_conv(x)\n            x = self.conv(x)\n            if self.extra_upsample:\n                x = self.up2(x)\n        return x\n"
  },
  {
    "path": "mmdet3d/models/necks/pointnet2_fp_neck.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.runner import BaseModule\nfrom torch import nn as nn\n\nfrom mmdet3d.ops import PointFPModule\nfrom ..builder import NECKS\n\n\n@NECKS.register_module()\nclass PointNetFPNeck(BaseModule):\n    r\"\"\"PointNet FP Module used in PointRCNN.\n\n    Refer to the `official code <https://github.com/charlesq34/pointnet2>`_.\n\n    .. code-block:: none\n\n        sa_n ----------------------------------------\n                                                     |\n        ... ---------------------------------        |\n                                             |       |\n        sa_1 -------------                   |       |\n                          |                  |       |\n        sa_0 -> fp_0 -> fp_module ->fp_1 -> ... -> fp_module -> fp_n\n\n    sa_n including sa_xyz (torch.Tensor) and sa_features (torch.Tensor)\n    fp_n including fp_xyz (torch.Tensor) and fp_features (torch.Tensor)\n\n    Args:\n        fp_channels (tuple[tuple[int]]): Tuple of mlp channels in FP modules.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self, fp_channels, init_cfg=None):\n        super(PointNetFPNeck, self).__init__(init_cfg=init_cfg)\n\n        self.num_fp = len(fp_channels)\n        self.FP_modules = nn.ModuleList()\n        for cur_fp_mlps in fp_channels:\n            self.FP_modules.append(PointFPModule(mlp_channels=cur_fp_mlps))\n\n    def _extract_input(self, feat_dict):\n        \"\"\"Extract inputs from features dictionary.\n\n        Args:\n            feat_dict (dict): Feature dict from backbone, which may contain\n                the following keys and values:\n\n                - sa_xyz (list[torch.Tensor]): Points of each sa module\n                    in shape (N, 3).\n                - sa_features (list[torch.Tensor]): Output features of\n                    each sa module in shape (N, M).\n\n        Returns:\n            list[torch.Tensor]: Coordinates of multiple levels of points.\n            list[torch.Tensor]: Features of multiple levels of points.\n        \"\"\"\n        sa_xyz = feat_dict['sa_xyz']\n        sa_features = feat_dict['sa_features']\n        assert len(sa_xyz) == len(sa_features)\n\n        return sa_xyz, sa_features\n\n    def forward(self, feat_dict):\n        \"\"\"Forward pass.\n\n        Args:\n            feat_dict (dict): Feature dict from backbone.\n\n        Returns:\n            dict[str, torch.Tensor]: Outputs of the Neck.\n\n                - fp_xyz (torch.Tensor): The coordinates of fp features.\n                - fp_features (torch.Tensor): The features from the last\n                    feature propagation layers.\n        \"\"\"\n        sa_xyz, sa_features = self._extract_input(feat_dict)\n\n        fp_feature = sa_features[-1]\n        fp_xyz = sa_xyz[-1]\n\n        for i in range(self.num_fp):\n            # consume the points in a bottom-up manner\n            fp_feature = self.FP_modules[i](sa_xyz[-(i + 2)], sa_xyz[-(i + 1)],\n                                            sa_features[-(i + 2)], fp_feature)\n            fp_xyz = sa_xyz[-(i + 2)]\n\n        ret = dict(fp_xyz=fp_xyz, fp_features=fp_feature)\n        return ret\n"
  },
  {
    "path": "mmdet3d/models/necks/second_fpn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nfrom mmcv.cnn import build_conv_layer, build_norm_layer, build_upsample_layer\nfrom mmcv.runner import BaseModule, auto_fp16\nfrom torch import nn as nn\n\nfrom ..builder import NECKS\n\n\n@NECKS.register_module()\nclass SECONDFPN(BaseModule):\n    \"\"\"FPN used in SECOND/PointPillars/PartA2/MVXNet.\n\n    Args:\n        in_channels (list[int]): Input channels of multi-scale feature maps.\n        out_channels (list[int]): Output channels of feature maps.\n        upsample_strides (list[int]): Strides used to upsample the\n            feature maps.\n        norm_cfg (dict): Config dict of normalization layers.\n        upsample_cfg (dict): Config dict of upsample layers.\n        conv_cfg (dict): Config dict of conv layers.\n        use_conv_for_no_stride (bool): Whether to use conv when stride is 1.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=[128, 128, 256],\n                 out_channels=[256, 256, 256],\n                 upsample_strides=[1, 2, 4],\n                 norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),\n                 upsample_cfg=dict(type='deconv', bias=False),\n                 conv_cfg=dict(type='Conv2d', bias=False),\n                 use_conv_for_no_stride=False,\n                 init_cfg=None):\n        # if for GroupNorm,\n        # cfg is dict(type='GN', num_groups=num_groups, eps=1e-3, affine=True)\n        super(SECONDFPN, self).__init__(init_cfg=init_cfg)\n        assert len(out_channels) == len(upsample_strides) == len(in_channels)\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        self.fp16_enabled = False\n\n        deblocks = []\n        for i, out_channel in enumerate(out_channels):\n            stride = upsample_strides[i]\n            if stride > 1 or (stride == 1 and not use_conv_for_no_stride):\n                upsample_layer = build_upsample_layer(\n                    upsample_cfg,\n                    in_channels=in_channels[i],\n                    out_channels=out_channel,\n                    kernel_size=upsample_strides[i],\n                    stride=upsample_strides[i])\n            else:\n                stride = np.round(1 / stride).astype(np.int64)\n                upsample_layer = build_conv_layer(\n                    conv_cfg,\n                    in_channels=in_channels[i],\n                    out_channels=out_channel,\n                    kernel_size=stride,\n                    stride=stride)\n\n            deblock = nn.Sequential(upsample_layer,\n                                    build_norm_layer(norm_cfg, out_channel)[1],\n                                    nn.ReLU(inplace=True))\n            deblocks.append(deblock)\n        self.deblocks = nn.ModuleList(deblocks)\n\n        if init_cfg is None:\n            self.init_cfg = [\n                dict(type='Kaiming', layer='ConvTranspose2d'),\n                dict(type='Constant', layer='NaiveSyncBatchNorm2d', val=1.0)\n            ]\n\n    @auto_fp16()\n    def forward(self, x):\n        \"\"\"Forward function.\n\n        Args:\n            x (torch.Tensor): 4D Tensor in (N, C, H, W) shape.\n\n        Returns:\n            list[torch.Tensor]: Multi-level feature maps.\n        \"\"\"\n        assert len(x) == len(self.in_channels)\n        ups = [deblock(x[i]) for i, deblock in enumerate(self.deblocks)]\n\n        if len(ups) > 1:\n            out = torch.cat(ups, dim=1)\n        else:\n            out = ups[0]\n        return [out]\n"
  },
  {
    "path": "mmdet3d/models/necks/view_transformer.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import build_conv_layer\nfrom mmcv.runner import BaseModule, force_fp32\nfrom torch.cuda.amp.autocast_mode import autocast\nfrom torch.utils.checkpoint import checkpoint\n\nfrom mmdet3d.ops.bev_pool_v2.bev_pool import bev_pool_v2\nfrom mmdet.models.backbones.resnet import BasicBlock\nfrom ..builder import NECKS\nimport torch.utils.checkpoint as cp\n\n@NECKS.register_module()\nclass LSSViewTransformer(BaseModule):\n    r\"\"\"Lift-Splat-Shoot view transformer.\n\n    Please refer to the `paper <https://arxiv.org/abs/2008.05711>`_\n\n    Args:\n        grid_config (dict): Config of grid alone each axis in format of\n            (lower_bound, upper_bound, interval). axis in {x,y,z,depth}.\n        input_size (tuple(int)): Size of input images in format of (height,\n            width).\n        downsample (int): Down sample factor from the input size to the feature\n            size.\n        in_channels (int): Channels of input feature.\n        out_channels (int): Channels of transformed feature.\n        accelerate (bool): Whether the view transformation is conducted with\n            acceleration. Note: the intrinsic and extrinsic of cameras should\n            be constant when 'accelerate' is set true.\n    \"\"\"\n\n    def __init__(\n        self,\n        grid_config,\n        input_size,\n         downsample=16,\n        in_channels=512,\n        out_channels=64,\n        accelerate=False,\n        uniform=False,\n        with_cp=False\n    ):\n        super(LSSViewTransformer, self).__init__()\n        self.uniform = uniform\n        self.with_cp = with_cp\n        self.grid_config = grid_config\n        self.downsample = downsample\n        self.create_grid_infos(**grid_config)\n        self.create_frustum(grid_config['depth'], input_size, downsample)\n        self.out_channels = out_channels\n        self.in_channels = in_channels\n        self.depth_net = nn.Conv2d(\n            in_channels, self.D + self.out_channels, kernel_size=1, padding=0)\n        self.accelerate = accelerate\n        self.initial_flag = True\n\n    def create_grid_infos(self, x, y, z, **kwargs):\n        \"\"\"Generate the grid information including the lower bound, interval,\n        and size.\n\n        Args:\n            x (tuple(float)): Config of grid alone x axis in format of\n                (lower_bound, upper_bound, interval).\n            y (tuple(float)): Config of grid alone y axis in format of\n                (lower_bound, upper_bound, interval).\n            z (tuple(float)): Config of grid alone z axis in format of\n                (lower_bound, upper_bound, interval).\n            **kwargs: Container for other potential parameters\n        \"\"\"\n        self.grid_lower_bound = torch.Tensor([cfg[0] for cfg in [x, y, z]])\n        self.grid_interval = torch.Tensor([cfg[2] for cfg in [x, y, z]])\n        self.grid_size = torch.Tensor([(cfg[1] - cfg[0]) / cfg[2]\n                                       for cfg in [x, y, z]])\n\n    def create_frustum(self, depth_cfg, input_size, downsample):\n        \"\"\"Generate the frustum template for each image.\n\n        Args:\n            depth_cfg (tuple(float)): Config of grid alone depth axis in format\n                of (lower_bound, upper_bound, interval).\n            input_size (tuple(int)): Size of input images in format of (height,\n                width).\n            downsample (int): Down sample scale factor from the input size to\n                the feature size.\n        \"\"\"\n        H_in, W_in = input_size\n        H_feat, W_feat = H_in // downsample, W_in // downsample\n        d = torch.arange(*depth_cfg, dtype=torch.float)\\\n            .view(-1, 1, 1).expand(-1, H_feat, W_feat)\n        self.D = d.shape[0]\n        x = torch.linspace(0, W_in - 1, W_feat,  dtype=torch.float)\\\n            .view(1, 1, W_feat).expand(self.D, H_feat, W_feat)\n        y = torch.linspace(0, H_in - 1, H_feat,  dtype=torch.float)\\\n            .view(1, H_feat, 1).expand(self.D, H_feat, W_feat)\n\n        # D x H x W x 3\n        self.frustum = torch.stack((x, y, d), -1)\n\n    def get_lidar_coor(self, rots, trans, cam2imgs, post_rots, post_trans,\n                       bda):\n        \"\"\"Calculate the locations of the frustum points in the lidar\n        coordinate system.\n\n        Args:\n            rots (torch.Tensor): Rotation from camera coordinate system to\n                lidar coordinate system in shape (B, N_cams, 3, 3).\n            trans (torch.Tensor): Translation from camera coordinate system to\n                lidar coordinate system in shape (B, N_cams, 3).\n            cam2imgs (torch.Tensor): Camera intrinsic matrixes in shape\n                (B, N_cams, 3, 3).\n            post_rots (torch.Tensor): Rotation in camera coordinate system in\n                shape (B, N_cams, 3, 3). It is derived from the image view\n                augmentation.\n            post_trans (torch.Tensor): Translation in camera coordinate system\n                derived from image view augmentation in shape (B, N_cams, 3).\n\n        Returns:\n            torch.tensor: Point coordinates in shape\n                (B, N_cams, D, ownsample, 3)\n        \"\"\"\n\n        B, N, _ = trans.shape\n\n        # post-transformation\n        # B x N x D x H x W x 3\n\n        points = self.frustum.to(rots) - post_trans.view(B, N, 1, 1, 1, 3)\n        points = torch.inverse(post_rots).view(B, N, 1, 1, 1, 3, 3)\\\n            .matmul(points.unsqueeze(-1))\n\n        # cam_to_ego\n        points = torch.cat(\n            (points[..., :2, :] * points[..., 2:3, :], points[..., 2:3, :]), 5)\n        combine = rots.matmul(torch.inverse(cam2imgs))\n        points = combine.view(B, N, 1, 1, 1, 3, 3).matmul(points).squeeze(-1)\n        points += trans.view(B, N, 1, 1, 1, 3)\n        points = bda.view(B, 1, 1, 1, 1, 3,\n                          3).matmul(points.unsqueeze(-1)).squeeze(-1)\n        return points\n\n    def init_acceleration_v2(self, coor):\n        \"\"\"Pre-compute the necessary information in acceleration including the\n        index of points in the final feature.\n\n        Args:\n            coor (torch.tensor): Coordinate of points in lidar space in shape\n                (B, N_cams, D, H, W, 3).\n            x (torch.tensor): Feature of points in shape\n                (B, N_cams, D, H, W, C).\n        \"\"\"\n\n        ranks_bev, ranks_depth, ranks_feat, \\\n            interval_starts, interval_lengths = \\\n            self.voxel_pooling_prepare_v2(coor)\n\n        self.ranks_bev = ranks_bev.int().contiguous()\n        self.ranks_feat = ranks_feat.int().contiguous()\n        self.ranks_depth = ranks_depth.int().contiguous()\n        self.interval_starts = interval_starts.int().contiguous()\n        self.interval_lengths = interval_lengths.int().contiguous()\n\n    def voxel_pooling_v2(self, coor, depth, feat):\n        ranks_bev, ranks_depth, ranks_feat, \\\n            interval_starts, interval_lengths = \\\n            self.voxel_pooling_prepare_v2(coor)\n        if ranks_feat is None:\n            print('warning ---> no points within the predefined '\n                  'bev receptive field')\n            dummy = torch.zeros(size=[\n                feat.shape[0], feat.shape[2],\n                int(self.grid_size[2]),\n                int(self.grid_size[0]),\n                int(self.grid_size[1])\n            ]).to(feat)\n            dummy = torch.cat(dummy.unbind(dim=2), 1)\n            return dummy\n        feat = feat.permute(0, 1, 3, 4, 2)\n        bev_feat_shape = (depth.shape[0], int(self.grid_size[2]),\n                          int(self.grid_size[1]), int(self.grid_size[0]),\n                          feat.shape[-1])  # (B, Z, Y, X, C)\n        bev_feat = bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev,\n                               bev_feat_shape, interval_starts,\n                               interval_lengths)\n        # collapse Z\n        # from IPython import embed\n        # embed()\n        # exit()\n        bev_feat = torch.cat(bev_feat.unbind(dim=2), 1)\n        return bev_feat\n\n    def voxel_pooling_prepare_v2(self, coor):\n        \"\"\"Data preparation for voxel pooling.\n\n        Args:\n            coor (torch.tensor): Coordinate of points in the lidar space in\n                shape (B, N, D, H, W, 3).\n\n        Returns:\n            tuple[torch.tensor]: Rank of the voxel that a point is belong to\n                in shape (N_Points); Reserved index of points in the depth\n                space in shape (N_Points). Reserved index of points in the\n                feature space in shape (N_Points).\n        \"\"\"\n        B, N, D, H, W, _ = coor.shape\n        num_points = B * N * D * H * W\n        # record the index of selected points for acceleration purpose\n        ranks_depth = torch.range(\n            0, num_points - 1, dtype=torch.int, device=coor.device)\n        ranks_feat = torch.range(\n            0, num_points // D - 1, dtype=torch.int, device=coor.device)\n        ranks_feat = ranks_feat.reshape(B, N, 1, H, W)\n        ranks_feat = ranks_feat.expand(B, N, D, H, W).flatten()\n\n        # convert coordinate into the voxel space\n        coor = ((coor - self.grid_lower_bound.to(coor)) /\n                self.grid_interval.to(coor))\n        coor = coor.long().view(num_points, 3)\n        batch_idx = torch.range(0, B - 1).reshape(B, 1). \\\n            expand(B, num_points // B).reshape(num_points, 1).to(coor)\n        coor = torch.cat((coor, batch_idx), 1)\n\n        # filter out points that are outside box\n        kept = (coor[:, 0] >= 0) & (coor[:, 0] < self.grid_size[0]) & \\\n               (coor[:, 1] >= 0) & (coor[:, 1] < self.grid_size[1]) & \\\n               (coor[:, 2] >= 0) & (coor[:, 2] < self.grid_size[2])\n        if len(kept) == 0:\n            return None, None, None, None, None\n        coor, ranks_depth, ranks_feat = \\\n            coor[kept], ranks_depth[kept], ranks_feat[kept]\n        # get tensors from the same voxel next to each other\n        ranks_bev = coor[:, 3] * (\n            self.grid_size[2] * self.grid_size[1] * self.grid_size[0])\n        ranks_bev += coor[:, 2] * (self.grid_size[1] * self.grid_size[0])\n        ranks_bev += coor[:, 1] * self.grid_size[0] + coor[:, 0]\n        \n        order = ranks_bev.argsort()\n        ranks_bev, ranks_depth, ranks_feat = \\\n            ranks_bev[order], ranks_depth[order], ranks_feat[order]\n\n        kept = torch.ones(\n            ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool)\n\n        kept[1:] = ranks_bev[1:] != ranks_bev[:-1]\n        interval_starts = torch.where(kept)[0].int()\n        if len(interval_starts) == 0:\n            return None, None, None, None, None\n        interval_lengths = torch.zeros_like(interval_starts)\n        interval_lengths[:-1] = interval_starts[1:] - interval_starts[:-1]\n        interval_lengths[-1] = ranks_bev.shape[0] - interval_starts[-1]\n\n        return  ranks_bev.int().contiguous(),  \\\n                ranks_depth.int().contiguous(),\\\n                ranks_feat.int().contiguous(),  \\\n                interval_starts.int().contiguous(),\\\n                interval_lengths.int().contiguous() \n\n    def pre_compute(self, input):\n        if self.initial_flag:\n            coor = self.get_lidar_coor(*input[1:7])\n            self.init_acceleration_v2( coor)\n            self.initial_flag = False\n\n    def view_transform_core(self, input, depth, tran_feat):\n        B, N, C, H, W = input[0].shape\n\n        # Lift-Splat\n        if self.accelerate:\n            feat = tran_feat.view(B, N, self.out_channels, H, W)\n            feat = feat.permute(0, 1, 3, 4, 2)\n            depth = depth.view(B, N, self.D, H, W)\n            bev_feat_shape = (depth.shape[0], int(self.grid_size[2]),\n                              int(self.grid_size[1]), int(self.grid_size[0]),\n                              feat.shape[-1])  # (B, Z, Y, X, C)\n            \n            bev_feat = bev_pool_v2(depth, feat, self.ranks_depth,\n                                   self.ranks_feat, self.ranks_bev,\n                                   bev_feat_shape, self.interval_starts,\n                                   self.interval_lengths)\n\n            bev_feat = bev_feat.squeeze(2)\n        else:\n            coor = self.get_lidar_coor(*input[1:7])\n            bev_feat = self.voxel_pooling_v2(\n                coor, depth.view(B, N, self.D, H, W),\n                tran_feat.view(B, N, self.out_channels, H, W))\n        return bev_feat, depth\n\n    def view_transform(self, input, depth, tran_feat):\n        if self.accelerate:\n            self.pre_compute(input)\n        return self.view_transform_core(input, depth, tran_feat)\n\n    def forward(self, input, return_depth_digit=False):\n        \"\"\"Transform image-view feature into bird-eye-view feature.\n\n        Args:\n            input (list(torch.tensor)): of (image-view feature, rots, trans,\n                intrins, post_rots, post_trans)\n\n        Returns:\n            torch.tensor: Bird-eye-view feature in shape (B, C, H_BEV, W_BEV)\n        \"\"\"\n        x = input[0]\n        B, N, C, H, W = x.shape\n        x = x.view(B * N, C, H, W)\n        if self.with_cp:\n            x = cp.checkpoint(self.depth_net, x)\n        else:\n            x = self.depth_net(x)            \n\n        depth_digit = x[:, :self.D, ...]\n        tran_feat = x[:, self.D:self.D + self.out_channels, ...]\n        if self.uniform:\n            depth_digit = depth_digit * 0\n            depth = depth_digit.softmax(dim=1)\n        else:\n            depth = depth_digit.softmax(dim=1)\n        if not return_depth_digit:\n            return self.view_transform(input, depth, tran_feat)\n        else:\n            return  self.view_transform(input, depth, tran_feat) + (depth_digit, )\n    def get_mlp_input(self, rot, tran, intrin, post_rot, post_tran, bda):\n        return None\n\n\n\n\n@NECKS.register_module()\nclass LSSViewTransformer2(BaseModule):\n    r\"\"\"Lift-Splat-Shoot view transformer.\n\n    Please refer to the `paper <https://arxiv.org/abs/2008.05711>`_\n\n    Args:\n        grid_config (dict): Config of grid alone each axis in format of\n            (lower_bound, upper_bound, interval). axis in {x,y,z,depth}.\n        input_size (tuple(int)): Size of input images in format of (height,\n            width).\n        downsample (int): Down sample factor from the input size to the feature\n            size.\n        in_channels (int): Channels of input feature.\n        out_channels (int): Channels of transformed feature.\n        accelerate (bool): Whether the view transformation is conducted with\n            acceleration. Note: the intrinsic and extrinsic of cameras should\n            be constant when 'accelerate' is set true.\n    \"\"\"\n\n    def __init__(\n        self,\n        grid_config,\n        input_size,\n         downsample=16,\n        in_channels=512,\n        out_channels=64,\n        accelerate=False,\n        uniform=False,\n        with_cp=False\n    ):\n        super(LSSViewTransformer2, self).__init__()\n        self.uniform = uniform\n        self.with_cp = with_cp\n        self.grid_config = grid_config\n        self.downsample = downsample\n        self.create_grid_infos(**grid_config)\n        self.create_frustum(grid_config['depth'], input_size, downsample)\n        self.out_channels = out_channels\n        self.in_channels = in_channels\n        self.depth_net = nn.Conv2d(\n            in_channels, self.D + self.out_channels, kernel_size=1, padding=0)\n        self.accelerate = accelerate\n        self.initial_flag = True\n\n    def create_grid_infos(self, x, y, z, **kwargs):\n        \"\"\"Generate the grid information including the lower bound, interval,\n        and size.\n\n        Args:\n            x (tuple(float)): Config of grid alone x axis in format of\n                (lower_bound, upper_bound, interval).\n            y (tuple(float)): Config of grid alone y axis in format of\n                (lower_bound, upper_bound, interval).\n            z (tuple(float)): Config of grid alone z axis in format of\n                (lower_bound, upper_bound, interval).\n            **kwargs: Container for other potential parameters\n        \"\"\"\n        self.grid_lower_bound = torch.Tensor([cfg[0] for cfg in [x, y, z]])\n        self.grid_interval = torch.Tensor([cfg[2] for cfg in [x, y, z]])\n        self.grid_size = torch.Tensor([(cfg[1] - cfg[0]) / cfg[2]\n                                       for cfg in [x, y, z]])\n\n    def create_frustum(self, depth_cfg, input_size, downsample):\n        \"\"\"Generate the frustum template for each image.\n\n        Args:\n            depth_cfg (tuple(float)): Config of grid alone depth axis in format\n                of (lower_bound, upper_bound, interval).\n            input_size (tuple(int)): Size of input images in format of (height,\n                width).\n            downsample (int): Down sample scale factor from the input size to\n                the feature size.\n        \"\"\"\n        H_in, W_in = input_size\n        H_feat, W_feat = H_in // downsample, W_in // downsample\n        d = torch.arange(*depth_cfg, dtype=torch.float)\\\n            .view(-1, 1, 1).expand(-1, H_feat, W_feat)\n        self.D = d.shape[0]\n        x = torch.linspace(0, W_in - 1, W_feat,  dtype=torch.float)\\\n            .view(1, 1, W_feat).expand(self.D, H_feat, W_feat)\n        y = torch.linspace(0, H_in - 1, H_feat,  dtype=torch.float)\\\n            .view(1, H_feat, 1).expand(self.D, H_feat, W_feat)\n\n        # D x H x W x 3\n        self.frustum = torch.stack((x, y, d), -1)\n\n    def get_lidar_coor(self, rots, trans, cam2imgs, post_rots, post_trans,\n                       bda):\n        \"\"\"Calculate the locations of the frustum points in the lidar\n        coordinate system.\n\n        Args:\n            rots (torch.Tensor): Rotation from camera coordinate system to\n                lidar coordinate system in shape (B, N_cams, 3, 3).\n            trans (torch.Tensor): Translation from camera coordinate system to\n                lidar coordinate system in shape (B, N_cams, 3).\n            cam2imgs (torch.Tensor): Camera intrinsic matrixes in shape\n                (B, N_cams, 3, 3).\n            post_rots (torch.Tensor): Rotation in camera coordinate system in\n                shape (B, N_cams, 3, 3). It is derived from the image view\n                augmentation.\n            post_trans (torch.Tensor): Translation in camera coordinate system\n                derived from image view augmentation in shape (B, N_cams, 3).\n\n        Returns:\n            torch.tensor: Point coordinates in shape\n                (B, N_cams, D, ownsample, 3)\n        \"\"\"\n\n        B, N, _ = trans.shape\n\n        # post-transformation\n        # B x N x D x H x W x 3\n\n        points = self.frustum.to(rots) - post_trans.view(B, N, 1, 1, 1, 3)\n        points = torch.inverse(post_rots).view(B, N, 1, 1, 1, 3, 3)\\\n            .matmul(points.unsqueeze(-1))\n\n        # cam_to_ego\n        points = torch.cat(\n            (points[..., :2, :] * points[..., 2:3, :], points[..., 2:3, :]), 5)\n        combine = rots.matmul(torch.inverse(cam2imgs))\n        points = combine.view(B, N, 1, 1, 1, 3, 3).matmul(points).squeeze(-1)\n        points += trans.view(B, N, 1, 1, 1, 3)\n        points = bda.view(B, 1, 1, 1, 1, 3,\n                          3).matmul(points.unsqueeze(-1)).squeeze(-1)\n        return points\n\n    def init_acceleration_v2(self, coor):\n        \"\"\"Pre-compute the necessary information in acceleration including the\n        index of points in the final feature.\n\n        Args:\n            coor (torch.tensor): Coordinate of points in lidar space in shape\n                (B, N_cams, D, H, W, 3).\n            x (torch.tensor): Feature of points in shape\n                (B, N_cams, D, H, W, C).\n        \"\"\"\n\n        kept, ranks_bev, ranks_depth, ranks_feat   = \\\n            self.voxel_pooling_prepare_v2_inf(coor)\n\n\n\n        self.kept = kept\n        self.ranks_bev = ranks_bev.int().contiguous()\n        self.ranks_feat = ranks_feat.int().contiguous()\n        self.ranks_depth = ranks_depth.int().contiguous()\n        # self.interval_starts = interval_starts.int().contiguous()\n        # self.interval_lengths = interval_lengths.int().contiguous()\n\n    def voxel_pooling_v2(self, coor, depth, feat):\n        ranks_bev, ranks_depth, ranks_feat = \\\n            self.voxel_pooling_prepare_v2(depth, coor)\n        \n        kept = torch.ones(\n            ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool)\n\n        kept[1:] = ranks_bev[1:] != ranks_bev[:-1]\n        interval_starts = torch.where(kept)[0].int()\n        if len(interval_starts) == 0:\n            return None, None, None, None, None\n        interval_lengths = torch.zeros_like(interval_starts)\n        interval_lengths[:-1] = interval_starts[1:] - interval_starts[:-1]\n        interval_lengths[-1] = ranks_bev.shape[0] - interval_starts[-1]\n\n        if ranks_feat is None:\n            print('warning ---> no points within the predefined '\n                  'bev receptive field')\n            dummy = torch.zeros(size=[\n                feat.shape[0], feat.shape[2],\n                int(self.grid_size[2]),\n                int(self.grid_size[0]),\n                int(self.grid_size[1])\n            ]).to(feat)\n            dummy = torch.cat(dummy.unbind(dim=2), 1)\n            return dummy\n        feat = feat.permute(0, 1, 3, 4, 2)\n        bev_feat_shape = (depth.shape[0], int(self.grid_size[2]),\n                          int(self.grid_size[1]), int(self.grid_size[0]),\n                          feat.shape[-1])  # (B, Z, Y, X, C)\n        bev_feat = bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev,\n                               bev_feat_shape, interval_starts,\n                               interval_lengths)\n        # collapse Z\n        bev_feat = torch.cat(bev_feat.unbind(dim=2), 1)\n        return bev_feat\n\n    def voxel_pooling_prepare_v2(self, depth, coor):\n        \"\"\"Data preparation for voxel pooling.\n\n        Args:\n            coor (torch.tensor): Coordinate of points in the lidar space in\n                shape (B, N, D, H, W, 3).\n\n        Returns:\n            tuple[torch.tensor]: Rank of the voxel that a point is belong to\n                in shape (N_Points); Reserved index of points in the depth\n                space in shape (N_Points). Reserved index of points in the\n                feature space in shape (N_Points).\n        \"\"\"\n        B, N, D, H, W, _ = coor.shape\n        num_points = B * N * D * H * W\n        # record the index of selected points for acceleration purpose\n        ranks_depth = torch.range(\n            0, num_points - 1, dtype=torch.int, device=coor.device)\n        ranks_feat = torch.range(\n            0, num_points // D - 1, dtype=torch.int, device=coor.device)\n        ranks_feat = ranks_feat.reshape(B, N, 1, H, W)\n        ranks_feat = ranks_feat.expand(B, N, D, H, W).flatten()\n\n        # convert coordinate into the voxel space\n        coor = ((coor - self.grid_lower_bound.to(coor)) /\n                self.grid_interval.to(coor))\n        coor = coor.long().view(num_points, 3)\n        batch_idx = torch.range(0, B - 1).reshape(B, 1). \\\n            expand(B, num_points // B).reshape(num_points, 1).to(coor)\n        coor = torch.cat((coor, batch_idx), 1)\n\n        # filter out points that are outside box\n        kept = (coor[:, 0] >= 0) & (coor[:, 0] < self.grid_size[0]) & \\\n               (coor[:, 1] >= 0) & (coor[:, 1] < self.grid_size[1]) & \\\n               (coor[:, 2] >= 0) & (coor[:, 2] < self.grid_size[2])\n        \n        kept_depth = depth.view(-1) > 0.01\n        kept = kept & kept_depth\n\n        if len(kept) == 0:\n            return None, None, None, None, None\n        coor, ranks_depth, ranks_feat = \\\n            coor[kept], ranks_depth[kept], ranks_feat[kept]\n        # get tensors from the same voxel next to each other\n        ranks_bev = coor[:, 3] * (\n            self.grid_size[2] * self.grid_size[1] * self.grid_size[0])\n        ranks_bev += coor[:, 2] * (self.grid_size[1] * self.grid_size[0])\n        ranks_bev += coor[:, 1] * self.grid_size[0] + coor[:, 0]\n        \n        order = ranks_bev.argsort()\n        ranks_bev, ranks_depth, ranks_feat = \\\n            ranks_bev[order], ranks_depth[order], ranks_feat[order]\n\n\n        return  ranks_bev.int().contiguous(),\\\n                ranks_depth.int().contiguous(),\\\n                ranks_feat.int().contiguous(),\n                # interval_starts.int().contiguous(),/  # 每个voxel的其实point坐标\n                # interval_lengths.int().contiguous() # 每个voxel中累加的point长度\n\n    def voxel_pooling_prepare_v2_inf(self, coor):\n        \"\"\"Data preparation for voxel pooling.\n\n        Args:\n            coor (torch.tensor): Coordinate of points in the lidar space in\n                shape (B, N, D, H, W, 3).\n\n        Returns:\n            tuple[torch.tensor]: Rank of the voxel that a point is belong to\n                in shape (N_Points); Reserved index of points in the depth\n                space in shape (N_Points). Reserved index of points in the\n                feature space in shape (N_Points).\n        \"\"\"\n        B, N, D, H, W, _ = coor.shape\n        num_points = B * N * D * H * W\n        # record the index of selected points for acceleration purpose\n        ranks_depth = torch.range(\n            0, num_points - 1, dtype=torch.int, device=coor.device)\n        ranks_feat = torch.range(\n            0, num_points // D - 1, dtype=torch.int, device=coor.device)\n        ranks_feat = ranks_feat.reshape(B, N, 1, H, W)\n        ranks_feat = ranks_feat.expand(B, N, D, H, W).flatten()\n\n        # convert coordinate into the voxel space\n        coor = ((coor - self.grid_lower_bound.to(coor)) /\n                self.grid_interval.to(coor))\n        coor = coor.long().view(num_points, 3)\n        batch_idx = torch.range(0, B - 1).reshape(B, 1). \\\n            expand(B, num_points // B).reshape(num_points, 1).to(coor)\n        coor = torch.cat((coor, batch_idx), 1)\n\n        # filter out points that are outside box\n        kept = (coor[:, 0] >= 0) & (coor[:, 0] < self.grid_size[0]) & \\\n               (coor[:, 1] >= 0) & (coor[:, 1] < self.grid_size[1]) & \\\n               (coor[:, 2] >= 0) & (coor[:, 2] < self.grid_size[2])\n        \n\n        if len(kept) == 0:\n            return None, None, None, None, None\n        coor, ranks_depth, ranks_feat = \\\n            coor[kept], ranks_depth[kept], ranks_feat[kept]\n        # get tensors from the same voxel next to each other\n        ranks_bev = coor[:, 3] * (\n            self.grid_size[2] * self.grid_size[1] * self.grid_size[0])\n        ranks_bev += coor[:, 2] * (self.grid_size[1] * self.grid_size[0])\n        ranks_bev += coor[:, 1] * self.grid_size[0] + coor[:, 0]\n        \n        order = ranks_bev.argsort()\n        ranks_bev, ranks_depth, ranks_feat = \\\n            ranks_bev[order], ranks_depth[order], ranks_feat[order]\n\n\n        return  kept,\\\n                ranks_bev.int().contiguous(),\\\n                ranks_depth.int().contiguous(),\\\n                ranks_feat.int().contiguous(),\n                # interval_starts.int().contiguous(),/  # 每个voxel的其实point坐标\n                # interval_lengths.int().contiguous() # 每个voxel中累加的point长度\n\n    def pre_compute(self, input):\n        if self.initial_flag:\n            coor = self.get_lidar_coor(*input[1:7])\n            self.init_acceleration_v2(coor)\n            self.initial_flag = False\n\n    def view_transform_core(self, input, depth, tran_feat):\n        B, N, C, H, W = input[0].shape\n\n        # Lift-Splat\n        if self.accelerate:\n            feat = tran_feat.view(B, N, self.out_channels, H, W)\n            feat = feat.permute(0, 1, 3, 4, 2)\n            depth = depth.view(B, N, self.D, H, W)\n            bev_feat_shape = (depth.shape[0], int(self.grid_size[2]),\n                              int(self.grid_size[1]), int(self.grid_size[0]),\n                              feat.shape[-1])  # (B, Z, Y, X, C)\n            \n            depth_kept = (depth.view(-1) > 0.01)[self.kept]\n            # print(depth_kept.sum()/self.kept.sum())\n            # from IPython import embed\n            # embed()\n            # exit()\n            new_ranks_bev = self.ranks_bev[depth_kept].contiguous()\n            new_ranks_feat = self.ranks_feat[depth_kept].contiguous()\n            new_ranks_depth = self.ranks_depth[depth_kept].contiguous()\n\n            kept = torch.ones(\n                new_ranks_bev.shape[0], device=new_ranks_bev.device, dtype=torch.bool)\n\n            kept[1:] = new_ranks_bev[1:] != new_ranks_bev[:-1]\n            interval_starts = torch.where(kept)[0].int()\n            interval_lengths = torch.zeros_like(interval_starts)\n            interval_lengths[:-1] = interval_starts[1:] - interval_starts[:-1]\n            interval_lengths[-1] = new_ranks_bev.shape[0] - interval_starts[-1]\n\n            bev_feat = bev_pool_v2(depth, feat, new_ranks_depth,\n                                   new_ranks_feat, new_ranks_bev,\n                                   bev_feat_shape, interval_starts.int().contiguous(),\n                                   interval_lengths.int().contiguous())\n\n            bev_feat = bev_feat.squeeze(2)\n        else:\n            coor = self.get_lidar_coor(*input[1:7])\n            bev_feat = self.voxel_pooling_v2(\n                coor, depth.view(B, N, self.D, H, W),\n                tran_feat.view(B, N, self.out_channels, H, W))\n        return bev_feat, depth\n\n    def view_transform(self, input, depth, tran_feat):\n        if self.accelerate:\n            self.pre_compute(input)\n        return self.view_transform_core(input, depth, tran_feat)\n\n    def forward(self, input, return_depth_digit=False):\n        \"\"\"Transform image-view feature into bird-eye-view feature.\n\n        Args:\n            input (list(torch.tensor)): of (image-view feature, rots, trans,\n                intrins, post_rots, post_trans)\n\n        Returns:\n            torch.tensor: Bird-eye-view feature in shape (B, C, H_BEV, W_BEV)\n        \"\"\"\n        x = input[0]\n        B, N, C, H, W = x.shape\n        x = x.view(B * N, C, H, W)\n        if self.with_cp:\n            x = cp.checkpoint(self.depth_net, x)\n        else:\n            x = self.depth_net(x)            \n\n        depth_digit = x[:, :self.D, ...]\n        tran_feat = x[:, self.D:self.D + self.out_channels, ...]\n        if self.uniform:\n            depth_digit = depth_digit * 0\n            depth = depth_digit.softmax(dim=1)\n        else:\n            depth = depth_digit.softmax(dim=1)\n        if not return_depth_digit:\n            return self.view_transform(input, depth, tran_feat)\n        else:\n            return  self.view_transform(input, depth, tran_feat) + (depth_digit, )\n    def get_mlp_input(self, rot, tran, intrin, post_rot, post_tran, bda):\n        return None\n\n\n\nclass _ASPPModule(nn.Module):\n\n    def __init__(self, inplanes, planes, kernel_size, padding, dilation,\n                 BatchNorm):\n        super(_ASPPModule, self).__init__()\n        self.atrous_conv = nn.Conv2d(\n            inplanes,\n            planes,\n            kernel_size=kernel_size,\n            stride=1,\n            padding=padding,\n            dilation=dilation,\n            bias=False)\n        self.bn = BatchNorm(planes)\n        self.relu = nn.ReLU()\n\n        self._init_weight()\n\n    def forward(self, x):\n        x = self.atrous_conv(x)\n        x = self.bn(x)\n\n        return self.relu(x)\n\n    def _init_weight(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                torch.nn.init.kaiming_normal_(m.weight)\n            elif isinstance(m, nn.BatchNorm2d):\n                m.weight.data.fill_(1)\n                m.bias.data.zero_()\n\n\nclass ASPP(nn.Module):\n\n    def __init__(self, inplanes, mid_channels=256, BatchNorm=nn.BatchNorm2d):\n        super(ASPP, self).__init__()\n\n        dilations = [1, 6, 12, 18]\n\n        self.aspp1 = _ASPPModule(\n            inplanes,\n            mid_channels,\n            1,\n            padding=0,\n            dilation=dilations[0],\n            BatchNorm=BatchNorm)\n        self.aspp2 = _ASPPModule(\n            inplanes,\n            mid_channels,\n            3,\n            padding=dilations[1],\n            dilation=dilations[1],\n            BatchNorm=BatchNorm)\n        self.aspp3 = _ASPPModule(\n            inplanes,\n            mid_channels,\n            3,\n            padding=dilations[2],\n            dilation=dilations[2],\n            BatchNorm=BatchNorm)\n        self.aspp4 = _ASPPModule(\n            inplanes,\n            mid_channels,\n            3,\n            padding=dilations[3],\n            dilation=dilations[3],\n            BatchNorm=BatchNorm)\n\n        self.global_avg_pool = nn.Sequential(\n            nn.AdaptiveAvgPool2d((1, 1)),\n            nn.Conv2d(inplanes, mid_channels, 1, stride=1, bias=False),\n            BatchNorm(mid_channels),\n            nn.ReLU(),\n        )\n        self.conv1 = nn.Conv2d(\n            int(mid_channels * 5), inplanes, 1, bias=False)\n        self.bn1 = BatchNorm(inplanes)\n        self.relu = nn.ReLU()\n        self.dropout = nn.Dropout(0.5)\n        self._init_weight()\n\n    def forward(self, x):\n        x1 = self.aspp1(x)\n        x2 = self.aspp2(x)\n        x3 = self.aspp3(x)\n        x4 = self.aspp4(x)\n        x5 = self.global_avg_pool(x)\n        x5 = F.interpolate(\n            x5, size=x4.size()[2:], mode='bilinear', align_corners=True)\n        x = torch.cat((x1, x2, x3, x4, x5), dim=1)\n\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n\n        return self.dropout(x)\n\n    def _init_weight(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                torch.nn.init.kaiming_normal_(m.weight)\n            elif isinstance(m, nn.BatchNorm2d):\n                m.weight.data.fill_(1)\n                m.bias.data.zero_()\n\n\nclass Mlp(nn.Module):\n\n    def __init__(self,\n                 in_features,\n                 hidden_features=None,\n                 out_features=None,\n                 act_layer=nn.ReLU,\n                 drop=0.0):\n        super().__init__()\n        out_features = out_features or in_features\n        hidden_features = hidden_features or in_features\n        self.fc1 = nn.Linear(in_features, hidden_features)\n        self.act = act_layer()\n        self.drop1 = nn.Dropout(drop)\n        self.fc2 = nn.Linear(hidden_features, out_features)\n        self.drop2 = nn.Dropout(drop)\n\n    def forward(self, x):\n        x = self.fc1(x)\n        x = self.act(x)\n        x = self.drop1(x)\n        x = self.fc2(x)\n        x = self.drop2(x)\n        return x\n\n\nclass SELayer(nn.Module):\n\n    def __init__(self, channels, act_layer=nn.ReLU, gate_layer=nn.Sigmoid):\n        super().__init__()\n        self.conv_reduce = nn.Conv2d(channels, channels, 1, bias=True)\n        self.act1 = act_layer()\n        self.conv_expand = nn.Conv2d(channels, channels, 1, bias=True)\n        self.gate = gate_layer()\n\n    def forward(self, x, x_se):\n        x_se = self.conv_reduce(x_se)\n        x_se = self.act1(x_se)\n        x_se = self.conv_expand(x_se)\n        return x * self.gate(x_se)\n\n\nclass DepthNet(nn.Module):\n\n    def __init__(self,\n                 in_channels,\n                 mid_channels,\n                 context_channels,\n                 depth_channels,\n                 use_dcn=True,\n                 use_aspp=True,\n                 aspp_mid_channels=-1\n                 ):\n        super(DepthNet, self).__init__()\n        self.reduce_conv = nn.Sequential(\n            nn.Conv2d(\n                in_channels, mid_channels, kernel_size=3, stride=1, padding=1),\n            nn.BatchNorm2d(mid_channels),\n            nn.ReLU(inplace=True),\n        )\n        self.context_conv = nn.Conv2d(\n            mid_channels, context_channels, kernel_size=1, stride=1, padding=0)\n        self.bn = nn.BatchNorm1d(27)\n        self.depth_mlp = Mlp(27, mid_channels, mid_channels)\n        self.depth_se = SELayer(mid_channels)  # NOTE: add camera-aware\n        self.context_mlp = Mlp(27, mid_channels, mid_channels)\n        self.context_se = SELayer(mid_channels)  # NOTE: add camera-aware\n        depth_conv_list = [\n            BasicBlock(mid_channels, mid_channels),\n            BasicBlock(mid_channels, mid_channels),\n            BasicBlock(mid_channels, mid_channels),\n        ]\n        if use_aspp:\n            if aspp_mid_channels<0:\n                aspp_mid_channels = mid_channels\n            depth_conv_list.append(ASPP(mid_channels, aspp_mid_channels))\n        if use_dcn:\n            depth_conv_list.append(\n                build_conv_layer(\n                    cfg=dict(\n                        type='DCN',\n                        in_channels=mid_channels,\n                        out_channels=mid_channels,\n                        kernel_size=3,\n                        padding=1,\n                        groups=4,\n                        im2col_step=128,\n                    )))\n        depth_conv_list.append(\n            nn.Conv2d(\n                mid_channels,\n                depth_channels,\n                kernel_size=1,\n                stride=1,\n                padding=0))\n        self.depth_conv = nn.Sequential(*depth_conv_list)\n\n    def forward(self, x, mlp_input):\n        mlp_input = self.bn(mlp_input.reshape(-1, mlp_input.shape[-1]))\n        x = self.reduce_conv(x)\n        context_se = self.context_mlp(mlp_input)[..., None, None]\n        context = self.context_se(x, context_se)\n        context = self.context_conv(context)\n        depth_se = self.depth_mlp(mlp_input)[..., None, None]\n        depth = self.depth_se(x, depth_se)\n        depth = self.depth_conv(depth)\n        return torch.cat([depth, context], dim=1)\n\n\nclass DepthAggregation(nn.Module):\n    \"\"\"pixel cloud feature extraction.\"\"\"\n\n    def __init__(self, in_channels, mid_channels, out_channels):\n        super(DepthAggregation, self).__init__()\n\n        self.reduce_conv = nn.Sequential(\n            nn.Conv2d(\n                in_channels,\n                mid_channels,\n                kernel_size=3,\n                stride=1,\n                padding=1,\n                bias=False),\n            nn.BatchNorm2d(mid_channels),\n            nn.ReLU(inplace=True),\n        )\n\n        self.conv = nn.Sequential(\n            nn.Conv2d(\n                mid_channels,\n                mid_channels,\n                kernel_size=3,\n                stride=1,\n                padding=1,\n                bias=False),\n            nn.BatchNorm2d(mid_channels),\n            nn.ReLU(inplace=True),\n            nn.Conv2d(\n                mid_channels,\n                mid_channels,\n                kernel_size=3,\n                stride=1,\n                padding=1,\n                bias=False),\n            nn.BatchNorm2d(mid_channels),\n            nn.ReLU(inplace=True),\n        )\n\n        self.out_conv = nn.Sequential(\n            nn.Conv2d(\n                mid_channels,\n                out_channels,\n                kernel_size=3,\n                stride=1,\n                padding=1,\n                bias=True),\n            # nn.BatchNorm3d(out_channels),\n            # nn.ReLU(inplace=True),\n        )\n\n    @autocast(False)\n    def forward(self, x):\n        x = checkpoint(self.reduce_conv, x)\n        short_cut = x\n        x = checkpoint(self.conv, x)\n        x = short_cut + x\n        x = self.out_conv(x)\n        return x\nimport numpy as np\n\n@NECKS.register_module()\nclass LSSViewTransformerBEVDepth(LSSViewTransformer2):\n\n    def __init__(self, loss_depth_weight=3.0, depthnet_cfg=dict(), with_cp=False, **kwargs):\n        super(LSSViewTransformerBEVDepth, self).__init__(**kwargs)\n        self.with_cp = with_cp\n        self.loss_depth_weight = loss_depth_weight\n        self.depth_net = DepthNet(self.in_channels, self.in_channels,\n                                  self.out_channels, self.D, **depthnet_cfg)\n\n    def get_mlp_input(self, rot, tran, intrin, post_rot, post_tran, bda):\n        B, N, _, _ = rot.shape\n        bda = bda.view(B, 1, 3, 3).repeat(1, N, 1, 1)\n        mlp_input = torch.stack([\n            intrin[:, :, 0, 0],\n            intrin[:, :, 1, 1],\n            intrin[:, :, 0, 2],\n            intrin[:, :, 1, 2],\n            post_rot[:, :, 0, 0],\n            post_rot[:, :, 0, 1],\n            post_tran[:, :, 0],\n            post_rot[:, :, 1, 0],\n            post_rot[:, :, 1, 1],\n            post_tran[:, :, 1],\n            bda[:, :, 0, 0],\n            bda[:, :, 0, 1],\n            bda[:, :, 1, 0],\n            bda[:, :, 1, 1],\n            bda[:, :, 2, 2],\n        ],\n                                dim=-1)\n        sensor2ego = torch.cat([rot, tran.reshape(B, N, 3, 1)],\n                               dim=-1).reshape(B, N, -1)\n        mlp_input = torch.cat([mlp_input, sensor2ego], dim=-1)\n        return mlp_input\n\n    def get_downsampled_gt_depth(self, gt_depths):\n        \"\"\"\n        Input:\n            gt_depths: [B, N, H, W]\n        Output:\n            gt_depths: [B*N*h*w, d]\n        \"\"\"\n        B, N, H, W = gt_depths.shape\n        gt_depths = gt_depths.view(B * N, H // self.downsample,\n                                   self.downsample, W // self.downsample,\n                                   self.downsample, 1)\n        gt_depths = gt_depths.permute(0, 1, 3, 5, 2, 4).contiguous()\n        gt_depths = gt_depths.view(-1, self.downsample * self.downsample)\n        gt_depths_tmp = torch.where(gt_depths == 0.0,\n                                    1e5 * torch.ones_like(gt_depths),\n                                    gt_depths)\n        gt_depths = torch.min(gt_depths_tmp, dim=-1).values\n        gt_depths = gt_depths.view(B * N, H // self.downsample,\n                                   W // self.downsample)\n\n        gt_depths = (\n            gt_depths -\n            (self.grid_config['depth'][0] -\n             self.grid_config['depth'][2])) / self.grid_config['depth'][2]\n        gt_depths = torch.where((gt_depths < self.D + 1) & (gt_depths >= 0.0),\n                                gt_depths, torch.zeros_like(gt_depths))\n        gt_depths = F.one_hot(\n            gt_depths.long(), num_classes=self.D + 1).view(-1, self.D + 1)[:,\n                                                                           1:]\n        return gt_depths.float()\n\n    @force_fp32()\n    def get_depth_loss(self, depth_labels, depth_preds):\n        depth_labels = self.get_downsampled_gt_depth(depth_labels)\n        depth_preds = depth_preds.permute(0, 2, 3,\n                                          1).contiguous().view(-1, self.D)\n        fg_mask = torch.max(depth_labels, dim=1).values > 0.0\n        depth_labels = depth_labels[fg_mask]\n        depth_preds = depth_preds[fg_mask]\n        with autocast(enabled=False):\n            depth_loss = F.binary_cross_entropy(\n                depth_preds,\n                depth_labels,\n                reduction='none',\n            ).sum() / max(1.0, fg_mask.sum())\n        return self.loss_depth_weight * depth_loss\n\n    def forward(self, input, return_depth_digit=False):\n        (x, rots, trans, intrins, post_rots, post_trans, bda,\n         mlp_input) = input[:8]\n\n        B, N, C, H, W = x.shape\n        x = x.view(B * N, C, H, W)\n        if self.with_cp:\n            x = cp.checkpoint(self.depth_net, x, mlp_input)\n        else:\n            x = self.depth_net(x, mlp_input)\n        depth_digit = x[:, :self.D, ...]\n        tran_feat = x[:, self.D:self.D + self.out_channels, ...]\n        depth = depth_digit.softmax(dim=1)\n        # from IPython import embed\n        # embed()\n        # exit()\n        # depth[depth<0.01] = 0\n        # self.counter.append(((depth<0.01).sum()/depth.numel()).item())\n\n        if return_depth_digit:\n            return self.view_transform(input, depth, tran_feat) + (depth_digit, )\n        else:\n            return self.view_transform(input, depth, tran_feat)\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .base_3droi_head import Base3DRoIHead\nfrom .bbox_heads import H3DBboxHead, PartA2BboxHead, PointRCNNBboxHead\nfrom .h3d_roi_head import H3DRoIHead\nfrom .mask_heads import PointwiseSemanticHead, PrimitiveHead\nfrom .part_aggregation_roi_head import PartAggregationROIHead\nfrom .point_rcnn_roi_head import PointRCNNRoIHead\nfrom .roi_extractors import (Single3DRoIAwareExtractor,\n                             Single3DRoIPointExtractor, SingleRoIExtractor)\n\n__all__ = [\n    'Base3DRoIHead', 'PartAggregationROIHead', 'PointwiseSemanticHead',\n    'Single3DRoIAwareExtractor', 'PartA2BboxHead', 'SingleRoIExtractor',\n    'H3DRoIHead', 'PrimitiveHead', 'PointRCNNRoIHead', 'H3DBboxHead',\n    'PointRCNNBboxHead', 'Single3DRoIPointExtractor'\n]\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/base_3droi_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom abc import ABCMeta, abstractmethod\n\nfrom mmcv.runner import BaseModule\n\n\nclass Base3DRoIHead(BaseModule, metaclass=ABCMeta):\n    \"\"\"Base class for 3d RoIHeads.\"\"\"\n\n    def __init__(self,\n                 bbox_head=None,\n                 mask_roi_extractor=None,\n                 mask_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(Base3DRoIHead, self).__init__(init_cfg=init_cfg)\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n\n        if bbox_head is not None:\n            self.init_bbox_head(bbox_head)\n\n        if mask_head is not None:\n            self.init_mask_head(mask_roi_extractor, mask_head)\n\n        self.init_assigner_sampler()\n\n    @property\n    def with_bbox(self):\n        \"\"\"bool: whether the RoIHead has box head\"\"\"\n        return hasattr(self, 'bbox_head') and self.bbox_head is not None\n\n    @property\n    def with_mask(self):\n        \"\"\"bool: whether the RoIHead has mask head\"\"\"\n        return hasattr(self, 'mask_head') and self.mask_head is not None\n\n    @abstractmethod\n    def init_bbox_head(self):\n        \"\"\"Initialize the box head.\"\"\"\n        pass\n\n    @abstractmethod\n    def init_mask_head(self):\n        \"\"\"Initialize maek head.\"\"\"\n        pass\n\n    @abstractmethod\n    def init_assigner_sampler(self):\n        \"\"\"Initialize assigner and sampler.\"\"\"\n        pass\n\n    @abstractmethod\n    def forward_train(self,\n                      x,\n                      img_metas,\n                      proposal_list,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_ignore=None,\n                      **kwargs):\n        \"\"\"Forward function during training.\n\n        Args:\n            x (dict): Contains features from the first stage.\n            img_metas (list[dict]): Meta info of each image.\n            proposal_list (list[dict]): Proposal information from rpn.\n            gt_bboxes (list[:obj:`BaseInstance3DBoxes`]):\n                GT bboxes of each sample. The bboxes are encapsulated\n                by 3D box structures.\n            gt_labels (list[torch.LongTensor]): GT labels of each sample.\n            gt_bboxes_ignore (list[torch.Tensor], optional):\n                Ground truth boxes to be ignored.\n\n        Returns:\n            dict[str, torch.Tensor]: Losses from each head.\n        \"\"\"\n        pass\n\n    def simple_test(self,\n                    x,\n                    proposal_list,\n                    img_metas,\n                    proposals=None,\n                    rescale=False,\n                    **kwargs):\n        \"\"\"Test without augmentation.\"\"\"\n        pass\n\n    def aug_test(self, x, proposal_list, img_metas, rescale=False, **kwargs):\n        \"\"\"Test with augmentations.\n\n        If rescale is False, then returned bboxes and masks will fit the scale\n        of imgs[0].\n        \"\"\"\n        pass\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/bbox_heads/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmdet.models.roi_heads.bbox_heads import (BBoxHead, ConvFCBBoxHead,\n                                               DoubleConvFCBBoxHead,\n                                               Shared2FCBBoxHead,\n                                               Shared4Conv1FCBBoxHead)\nfrom .h3d_bbox_head import H3DBboxHead\nfrom .parta2_bbox_head import PartA2BboxHead\nfrom .point_rcnn_bbox_head import PointRCNNBboxHead\n\n__all__ = [\n    'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead',\n    'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 'PartA2BboxHead',\n    'H3DBboxHead', 'PointRCNNBboxHead'\n]\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/bbox_heads/h3d_bbox_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule\nfrom torch import nn as nn\nfrom torch.nn import functional as F\n\nfrom mmdet3d.core.bbox import DepthInstance3DBoxes\nfrom mmdet3d.core.post_processing import aligned_3d_nms\nfrom mmdet3d.models.builder import HEADS, build_loss\nfrom mmdet3d.models.losses import chamfer_distance\nfrom mmdet3d.ops import build_sa_module\nfrom mmdet.core import build_bbox_coder, multi_apply\n\n\n@HEADS.register_module()\nclass H3DBboxHead(BaseModule):\n    r\"\"\"Bbox head of `H3DNet <https://arxiv.org/abs/2006.05682>`_.\n\n    Args:\n        num_classes (int): The number of classes.\n        surface_matching_cfg (dict): Config for surface primitive matching.\n        line_matching_cfg (dict): Config for line primitive matching.\n        bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for encoding and\n            decoding boxes.\n        train_cfg (dict): Config for training.\n        test_cfg (dict): Config for testing.\n        gt_per_seed (int): Number of ground truth votes generated\n            from each seed point.\n        num_proposal (int): Number of proposal votes generated.\n        feat_channels (tuple[int]): Convolution channels of\n            prediction layer.\n        primitive_feat_refine_streams (int): The number of mlps to\n            refine primitive feature.\n        primitive_refine_channels (tuple[int]): Convolution channels of\n            prediction layer.\n        upper_thresh (float): Threshold for line matching.\n        surface_thresh (float): Threshold for surface matching.\n        line_thresh (float): Threshold for line matching.\n        conv_cfg (dict): Config of convolution in prediction layer.\n        norm_cfg (dict): Config of BN in prediction layer.\n        objectness_loss (dict): Config of objectness loss.\n        center_loss (dict): Config of center loss.\n        dir_class_loss (dict): Config of direction classification loss.\n        dir_res_loss (dict): Config of direction residual regression loss.\n        size_class_loss (dict): Config of size classification loss.\n        size_res_loss (dict): Config of size residual regression loss.\n        semantic_loss (dict): Config of point-wise semantic segmentation loss.\n        cues_objectness_loss (dict): Config of cues objectness loss.\n        cues_semantic_loss (dict): Config of cues semantic loss.\n        proposal_objectness_loss (dict): Config of proposal objectness\n            loss.\n        primitive_center_loss (dict): Config of primitive center regression\n            loss.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 suface_matching_cfg,\n                 line_matching_cfg,\n                 bbox_coder,\n                 train_cfg=None,\n                 test_cfg=None,\n                 gt_per_seed=1,\n                 num_proposal=256,\n                 feat_channels=(128, 128),\n                 primitive_feat_refine_streams=2,\n                 primitive_refine_channels=[128, 128, 128],\n                 upper_thresh=100.0,\n                 surface_thresh=0.5,\n                 line_thresh=0.5,\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=dict(type='BN1d'),\n                 objectness_loss=None,\n                 center_loss=None,\n                 dir_class_loss=None,\n                 dir_res_loss=None,\n                 size_class_loss=None,\n                 size_res_loss=None,\n                 semantic_loss=None,\n                 cues_objectness_loss=None,\n                 cues_semantic_loss=None,\n                 proposal_objectness_loss=None,\n                 primitive_center_loss=None,\n                 init_cfg=None):\n        super(H3DBboxHead, self).__init__(init_cfg=init_cfg)\n        self.num_classes = num_classes\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.gt_per_seed = gt_per_seed\n        self.num_proposal = num_proposal\n        self.with_angle = bbox_coder['with_rot']\n        self.upper_thresh = upper_thresh\n        self.surface_thresh = surface_thresh\n        self.line_thresh = line_thresh\n\n        self.objectness_loss = build_loss(objectness_loss)\n        self.center_loss = build_loss(center_loss)\n        self.dir_class_loss = build_loss(dir_class_loss)\n        self.dir_res_loss = build_loss(dir_res_loss)\n        self.size_class_loss = build_loss(size_class_loss)\n        self.size_res_loss = build_loss(size_res_loss)\n        self.semantic_loss = build_loss(semantic_loss)\n\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n        self.num_sizes = self.bbox_coder.num_sizes\n        self.num_dir_bins = self.bbox_coder.num_dir_bins\n\n        self.cues_objectness_loss = build_loss(cues_objectness_loss)\n        self.cues_semantic_loss = build_loss(cues_semantic_loss)\n        self.proposal_objectness_loss = build_loss(proposal_objectness_loss)\n        self.primitive_center_loss = build_loss(primitive_center_loss)\n\n        assert suface_matching_cfg['mlp_channels'][-1] == \\\n            line_matching_cfg['mlp_channels'][-1]\n\n        # surface center matching\n        self.surface_center_matcher = build_sa_module(suface_matching_cfg)\n        # line center matching\n        self.line_center_matcher = build_sa_module(line_matching_cfg)\n\n        # Compute the matching scores\n        matching_feat_dims = suface_matching_cfg['mlp_channels'][-1]\n        self.matching_conv = ConvModule(\n            matching_feat_dims,\n            matching_feat_dims,\n            1,\n            padding=0,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            bias=True,\n            inplace=True)\n        self.matching_pred = nn.Conv1d(matching_feat_dims, 2, 1)\n\n        # Compute the semantic matching scores\n        self.semantic_matching_conv = ConvModule(\n            matching_feat_dims,\n            matching_feat_dims,\n            1,\n            padding=0,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            bias=True,\n            inplace=True)\n        self.semantic_matching_pred = nn.Conv1d(matching_feat_dims, 2, 1)\n\n        # Surface feature aggregation\n        self.surface_feats_aggregation = list()\n        for k in range(primitive_feat_refine_streams):\n            self.surface_feats_aggregation.append(\n                ConvModule(\n                    matching_feat_dims,\n                    matching_feat_dims,\n                    1,\n                    padding=0,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    bias=True,\n                    inplace=True))\n        self.surface_feats_aggregation = nn.Sequential(\n            *self.surface_feats_aggregation)\n\n        # Line feature aggregation\n        self.line_feats_aggregation = list()\n        for k in range(primitive_feat_refine_streams):\n            self.line_feats_aggregation.append(\n                ConvModule(\n                    matching_feat_dims,\n                    matching_feat_dims,\n                    1,\n                    padding=0,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    bias=True,\n                    inplace=True))\n        self.line_feats_aggregation = nn.Sequential(\n            *self.line_feats_aggregation)\n\n        # surface center(6) + line center(12)\n        prev_channel = 18 * matching_feat_dims\n        self.bbox_pred = nn.ModuleList()\n        for k in range(len(primitive_refine_channels)):\n            self.bbox_pred.append(\n                ConvModule(\n                    prev_channel,\n                    primitive_refine_channels[k],\n                    1,\n                    padding=0,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    bias=True,\n                    inplace=False))\n            prev_channel = primitive_refine_channels[k]\n\n        # Final object detection\n        # Objectness scores (2), center residual (3),\n        # heading class+residual (num_heading_bin*2), size class +\n        # residual(num_size_cluster*4)\n        conv_out_channel = (2 + 3 + bbox_coder['num_dir_bins'] * 2 +\n                            bbox_coder['num_sizes'] * 4 + self.num_classes)\n        self.bbox_pred.append(nn.Conv1d(prev_channel, conv_out_channel, 1))\n\n    def forward(self, feats_dict, sample_mod):\n        \"\"\"Forward pass.\n\n        Args:\n            feats_dict (dict): Feature dict from backbone.\n            sample_mod (str): Sample mode for vote aggregation layer.\n                valid modes are \"vote\", \"seed\" and \"random\".\n\n        Returns:\n            dict: Predictions of vote head.\n        \"\"\"\n        ret_dict = {}\n        aggregated_points = feats_dict['aggregated_points']\n        original_feature = feats_dict['aggregated_features']\n        batch_size = original_feature.shape[0]\n        object_proposal = original_feature.shape[2]\n\n        # Extract surface center, features and semantic predictions\n        z_center = feats_dict['pred_z_center']\n        xy_center = feats_dict['pred_xy_center']\n        z_semantic = feats_dict['sem_cls_scores_z']\n        xy_semantic = feats_dict['sem_cls_scores_xy']\n        z_feature = feats_dict['aggregated_features_z']\n        xy_feature = feats_dict['aggregated_features_xy']\n        # Extract line points and features\n        line_center = feats_dict['pred_line_center']\n        line_feature = feats_dict['aggregated_features_line']\n\n        surface_center_pred = torch.cat((z_center, xy_center), dim=1)\n        ret_dict['surface_center_pred'] = surface_center_pred\n        ret_dict['surface_sem_pred'] = torch.cat((z_semantic, xy_semantic),\n                                                 dim=1)\n\n        # Extract the surface and line centers of rpn proposals\n        rpn_proposals = feats_dict['proposal_list']\n        rpn_proposals_bbox = DepthInstance3DBoxes(\n            rpn_proposals.reshape(-1, 7).clone(),\n            box_dim=rpn_proposals.shape[-1],\n            with_yaw=self.with_angle,\n            origin=(0.5, 0.5, 0.5))\n\n        obj_surface_center, obj_line_center = \\\n            rpn_proposals_bbox.get_surface_line_center()\n        obj_surface_center = obj_surface_center.reshape(\n            batch_size, -1, 6, 3).transpose(1, 2).reshape(batch_size, -1, 3)\n        obj_line_center = obj_line_center.reshape(batch_size, -1, 12,\n                                                  3).transpose(1, 2).reshape(\n                                                      batch_size, -1, 3)\n        ret_dict['surface_center_object'] = obj_surface_center\n        ret_dict['line_center_object'] = obj_line_center\n\n        # aggregate primitive z and xy features to rpn proposals\n        surface_center_feature_pred = torch.cat((z_feature, xy_feature), dim=2)\n        surface_center_feature_pred = torch.cat(\n            (surface_center_feature_pred.new_zeros(\n                (batch_size, 6, surface_center_feature_pred.shape[2])),\n             surface_center_feature_pred),\n            dim=1)\n\n        surface_xyz, surface_features, _ = self.surface_center_matcher(\n            surface_center_pred,\n            surface_center_feature_pred,\n            target_xyz=obj_surface_center)\n\n        # aggregate primitive line features to rpn proposals\n        line_feature = torch.cat((line_feature.new_zeros(\n            (batch_size, 12, line_feature.shape[2])), line_feature),\n                                 dim=1)\n        line_xyz, line_features, _ = self.line_center_matcher(\n            line_center, line_feature, target_xyz=obj_line_center)\n\n        # combine the surface and line features\n        combine_features = torch.cat((surface_features, line_features), dim=2)\n\n        matching_features = self.matching_conv(combine_features)\n        matching_score = self.matching_pred(matching_features)\n        ret_dict['matching_score'] = matching_score.transpose(2, 1)\n\n        semantic_matching_features = self.semantic_matching_conv(\n            combine_features)\n        semantic_matching_score = self.semantic_matching_pred(\n            semantic_matching_features)\n        ret_dict['semantic_matching_score'] = \\\n            semantic_matching_score.transpose(2, 1)\n\n        surface_features = self.surface_feats_aggregation(surface_features)\n        line_features = self.line_feats_aggregation(line_features)\n\n        # Combine all surface and line features\n        surface_features = surface_features.view(batch_size, -1,\n                                                 object_proposal)\n        line_features = line_features.view(batch_size, -1, object_proposal)\n\n        combine_feature = torch.cat((surface_features, line_features), dim=1)\n\n        # Final bbox predictions\n        bbox_predictions = self.bbox_pred[0](combine_feature)\n        bbox_predictions += original_feature\n        for conv_module in self.bbox_pred[1:]:\n            bbox_predictions = conv_module(bbox_predictions)\n\n        refine_decode_res = self.bbox_coder.split_pred(\n            bbox_predictions[:, :self.num_classes + 2],\n            bbox_predictions[:, self.num_classes + 2:], aggregated_points)\n        for key in refine_decode_res.keys():\n            ret_dict[key + '_optimized'] = refine_decode_res[key]\n        return ret_dict\n\n    def loss(self,\n             bbox_preds,\n             points,\n             gt_bboxes_3d,\n             gt_labels_3d,\n             pts_semantic_mask=None,\n             pts_instance_mask=None,\n             img_metas=None,\n             rpn_targets=None,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute loss.\n\n        Args:\n            bbox_preds (dict): Predictions from forward of h3d bbox head.\n            points (list[torch.Tensor]): Input points.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                bboxes of each sample.\n            gt_labels_3d (list[torch.Tensor]): Labels of each sample.\n            pts_semantic_mask (list[torch.Tensor]): Point-wise\n                semantic mask.\n            pts_instance_mask (list[torch.Tensor]): Point-wise\n                instance mask.\n            img_metas (list[dict]): Contain pcd and img's meta info.\n            rpn_targets (Tuple) : Targets generated by rpn head.\n            gt_bboxes_ignore (list[torch.Tensor]): Specify\n                which bounding.\n\n        Returns:\n            dict: Losses of H3dnet.\n        \"\"\"\n        (vote_targets, vote_target_masks, size_class_targets, size_res_targets,\n         dir_class_targets, dir_res_targets, center_targets, _, mask_targets,\n         valid_gt_masks, objectness_targets, objectness_weights,\n         box_loss_weights, valid_gt_weights) = rpn_targets\n\n        losses = {}\n\n        # calculate refined proposal loss\n        refined_proposal_loss = self.get_proposal_stage_loss(\n            bbox_preds,\n            size_class_targets,\n            size_res_targets,\n            dir_class_targets,\n            dir_res_targets,\n            center_targets,\n            mask_targets,\n            objectness_targets,\n            objectness_weights,\n            box_loss_weights,\n            valid_gt_weights,\n            suffix='_optimized')\n        for key in refined_proposal_loss.keys():\n            losses[key + '_optimized'] = refined_proposal_loss[key]\n\n        bbox3d_optimized = self.bbox_coder.decode(\n            bbox_preds, suffix='_optimized')\n\n        targets = self.get_targets(points, gt_bboxes_3d, gt_labels_3d,\n                                   pts_semantic_mask, pts_instance_mask,\n                                   bbox_preds)\n\n        (cues_objectness_label, cues_sem_label, proposal_objectness_label,\n         cues_mask, cues_match_mask, proposal_objectness_mask,\n         cues_matching_label, obj_surface_line_center) = targets\n\n        # match scores for each geometric primitive\n        objectness_scores = bbox_preds['matching_score']\n        # match scores for the semantics of primitives\n        objectness_scores_sem = bbox_preds['semantic_matching_score']\n\n        primitive_objectness_loss = self.cues_objectness_loss(\n            objectness_scores.transpose(2, 1),\n            cues_objectness_label,\n            weight=cues_mask,\n            avg_factor=cues_mask.sum() + 1e-6)\n\n        primitive_sem_loss = self.cues_semantic_loss(\n            objectness_scores_sem.transpose(2, 1),\n            cues_sem_label,\n            weight=cues_mask,\n            avg_factor=cues_mask.sum() + 1e-6)\n\n        objectness_scores = bbox_preds['obj_scores_optimized']\n        objectness_loss_refine = self.proposal_objectness_loss(\n            objectness_scores.transpose(2, 1), proposal_objectness_label)\n        primitive_matching_loss = (objectness_loss_refine *\n                                   cues_match_mask).sum() / (\n                                       cues_match_mask.sum() + 1e-6) * 0.5\n        primitive_sem_matching_loss = (\n            objectness_loss_refine * proposal_objectness_mask).sum() / (\n                proposal_objectness_mask.sum() + 1e-6) * 0.5\n\n        # Get the object surface center here\n        batch_size, object_proposal = bbox3d_optimized.shape[:2]\n        refined_bbox = DepthInstance3DBoxes(\n            bbox3d_optimized.reshape(-1, 7).clone(),\n            box_dim=bbox3d_optimized.shape[-1],\n            with_yaw=self.with_angle,\n            origin=(0.5, 0.5, 0.5))\n\n        pred_obj_surface_center, pred_obj_line_center = \\\n            refined_bbox.get_surface_line_center()\n        pred_obj_surface_center = pred_obj_surface_center.reshape(\n            batch_size, -1, 6, 3).transpose(1, 2).reshape(batch_size, -1, 3)\n        pred_obj_line_center = pred_obj_line_center.reshape(\n            batch_size, -1, 12, 3).transpose(1, 2).reshape(batch_size, -1, 3)\n        pred_surface_line_center = torch.cat(\n            (pred_obj_surface_center, pred_obj_line_center), 1)\n\n        square_dist = self.primitive_center_loss(pred_surface_line_center,\n                                                 obj_surface_line_center)\n\n        match_dist = torch.sqrt(square_dist.sum(dim=-1) + 1e-6)\n        primitive_centroid_reg_loss = torch.sum(\n            match_dist * cues_matching_label) / (\n                cues_matching_label.sum() + 1e-6)\n\n        refined_loss = dict(\n            primitive_objectness_loss=primitive_objectness_loss,\n            primitive_sem_loss=primitive_sem_loss,\n            primitive_matching_loss=primitive_matching_loss,\n            primitive_sem_matching_loss=primitive_sem_matching_loss,\n            primitive_centroid_reg_loss=primitive_centroid_reg_loss)\n\n        losses.update(refined_loss)\n\n        return losses\n\n    def get_bboxes(self,\n                   points,\n                   bbox_preds,\n                   input_metas,\n                   rescale=False,\n                   suffix=''):\n        \"\"\"Generate bboxes from vote head predictions.\n\n        Args:\n            points (torch.Tensor): Input points.\n            bbox_preds (dict): Predictions from vote head.\n            input_metas (list[dict]): Point cloud and image's meta info.\n            rescale (bool): Whether to rescale bboxes.\n\n        Returns:\n            list[tuple[torch.Tensor]]: Bounding boxes, scores and labels.\n        \"\"\"\n        # decode boxes\n        obj_scores = F.softmax(\n            bbox_preds['obj_scores' + suffix], dim=-1)[..., -1]\n\n        sem_scores = F.softmax(bbox_preds['sem_scores'], dim=-1)\n\n        prediction_collection = {}\n        prediction_collection['center'] = bbox_preds['center' + suffix]\n        prediction_collection['dir_class'] = bbox_preds['dir_class']\n        prediction_collection['dir_res'] = bbox_preds['dir_res' + suffix]\n        prediction_collection['size_class'] = bbox_preds['size_class']\n        prediction_collection['size_res'] = bbox_preds['size_res' + suffix]\n\n        bbox3d = self.bbox_coder.decode(prediction_collection)\n\n        batch_size = bbox3d.shape[0]\n        results = list()\n        for b in range(batch_size):\n            bbox_selected, score_selected, labels = self.multiclass_nms_single(\n                obj_scores[b], sem_scores[b], bbox3d[b], points[b, ..., :3],\n                input_metas[b])\n            bbox = input_metas[b]['box_type_3d'](\n                bbox_selected,\n                box_dim=bbox_selected.shape[-1],\n                with_yaw=self.bbox_coder.with_rot)\n            results.append((bbox, score_selected, labels))\n\n        return results\n\n    def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points,\n                              input_meta):\n        \"\"\"Multi-class nms in single batch.\n\n        Args:\n            obj_scores (torch.Tensor): Objectness score of bounding boxes.\n            sem_scores (torch.Tensor): semantic class score of bounding boxes.\n            bbox (torch.Tensor): Predicted bounding boxes.\n            points (torch.Tensor): Input points.\n            input_meta (dict): Point cloud and image's meta info.\n\n        Returns:\n            tuple[torch.Tensor]: Bounding boxes, scores and labels.\n        \"\"\"\n        bbox = input_meta['box_type_3d'](\n            bbox,\n            box_dim=bbox.shape[-1],\n            with_yaw=self.bbox_coder.with_rot,\n            origin=(0.5, 0.5, 0.5))\n        box_indices = bbox.points_in_boxes_all(points)\n\n        corner3d = bbox.corners\n        minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6)))\n        minmax_box3d[:, :3] = torch.min(corner3d, dim=1)[0]\n        minmax_box3d[:, 3:] = torch.max(corner3d, dim=1)[0]\n\n        nonempty_box_mask = box_indices.T.sum(1) > 5\n\n        bbox_classes = torch.argmax(sem_scores, -1)\n        nms_selected = aligned_3d_nms(minmax_box3d[nonempty_box_mask],\n                                      obj_scores[nonempty_box_mask],\n                                      bbox_classes[nonempty_box_mask],\n                                      self.test_cfg.nms_thr)\n\n        # filter empty boxes and boxes with low score\n        scores_mask = (obj_scores > self.test_cfg.score_thr)\n        nonempty_box_inds = torch.nonzero(\n            nonempty_box_mask, as_tuple=False).flatten()\n        nonempty_mask = torch.zeros_like(bbox_classes).scatter(\n            0, nonempty_box_inds[nms_selected], 1)\n        selected = (nonempty_mask.bool() & scores_mask.bool())\n\n        if self.test_cfg.per_class_proposal:\n            bbox_selected, score_selected, labels = [], [], []\n            for k in range(sem_scores.shape[-1]):\n                bbox_selected.append(bbox[selected].tensor)\n                score_selected.append(obj_scores[selected] *\n                                      sem_scores[selected][:, k])\n                labels.append(\n                    torch.zeros_like(bbox_classes[selected]).fill_(k))\n            bbox_selected = torch.cat(bbox_selected, 0)\n            score_selected = torch.cat(score_selected, 0)\n            labels = torch.cat(labels, 0)\n        else:\n            bbox_selected = bbox[selected].tensor\n            score_selected = obj_scores[selected]\n            labels = bbox_classes[selected]\n\n        return bbox_selected, score_selected, labels\n\n    def get_proposal_stage_loss(self,\n                                bbox_preds,\n                                size_class_targets,\n                                size_res_targets,\n                                dir_class_targets,\n                                dir_res_targets,\n                                center_targets,\n                                mask_targets,\n                                objectness_targets,\n                                objectness_weights,\n                                box_loss_weights,\n                                valid_gt_weights,\n                                suffix=''):\n        \"\"\"Compute loss for the aggregation module.\n\n        Args:\n            bbox_preds (dict): Predictions from forward of vote head.\n            size_class_targets (torch.Tensor): Ground truth\n                size class of each prediction bounding box.\n            size_res_targets (torch.Tensor): Ground truth\n                size residual of each prediction bounding box.\n            dir_class_targets (torch.Tensor): Ground truth\n                direction class of each prediction bounding box.\n            dir_res_targets (torch.Tensor): Ground truth\n                direction residual of each prediction bounding box.\n            center_targets (torch.Tensor): Ground truth center\n                of each prediction bounding box.\n            mask_targets (torch.Tensor): Validation of each\n                prediction bounding box.\n            objectness_targets (torch.Tensor): Ground truth\n                objectness label of each prediction bounding box.\n            objectness_weights (torch.Tensor): Weights of objectness\n                loss for each prediction bounding box.\n            box_loss_weights (torch.Tensor): Weights of regression\n                loss for each prediction bounding box.\n            valid_gt_weights (torch.Tensor): Validation of each\n                ground truth bounding box.\n\n        Returns:\n            dict: Losses of aggregation module.\n        \"\"\"\n        # calculate objectness loss\n        objectness_loss = self.objectness_loss(\n            bbox_preds['obj_scores' + suffix].transpose(2, 1),\n            objectness_targets,\n            weight=objectness_weights)\n\n        # calculate center loss\n        source2target_loss, target2source_loss = self.center_loss(\n            bbox_preds['center' + suffix],\n            center_targets,\n            src_weight=box_loss_weights,\n            dst_weight=valid_gt_weights)\n        center_loss = source2target_loss + target2source_loss\n\n        # calculate direction class loss\n        dir_class_loss = self.dir_class_loss(\n            bbox_preds['dir_class' + suffix].transpose(2, 1),\n            dir_class_targets,\n            weight=box_loss_weights)\n\n        # calculate direction residual loss\n        batch_size, proposal_num = size_class_targets.shape[:2]\n        heading_label_one_hot = dir_class_targets.new_zeros(\n            (batch_size, proposal_num, self.num_dir_bins))\n        heading_label_one_hot.scatter_(2, dir_class_targets.unsqueeze(-1), 1)\n        dir_res_norm = (bbox_preds['dir_res_norm' + suffix] *\n                        heading_label_one_hot).sum(dim=-1)\n        dir_res_loss = self.dir_res_loss(\n            dir_res_norm, dir_res_targets, weight=box_loss_weights)\n\n        # calculate size class loss\n        size_class_loss = self.size_class_loss(\n            bbox_preds['size_class' + suffix].transpose(2, 1),\n            size_class_targets,\n            weight=box_loss_weights)\n\n        # calculate size residual loss\n        one_hot_size_targets = box_loss_weights.new_zeros(\n            (batch_size, proposal_num, self.num_sizes))\n        one_hot_size_targets.scatter_(2, size_class_targets.unsqueeze(-1), 1)\n        one_hot_size_targets_expand = one_hot_size_targets.unsqueeze(\n            -1).repeat(1, 1, 1, 3)\n        size_residual_norm = (bbox_preds['size_res_norm' + suffix] *\n                              one_hot_size_targets_expand).sum(dim=2)\n        box_loss_weights_expand = box_loss_weights.unsqueeze(-1).repeat(\n            1, 1, 3)\n        size_res_loss = self.size_res_loss(\n            size_residual_norm,\n            size_res_targets,\n            weight=box_loss_weights_expand)\n\n        # calculate semantic loss\n        semantic_loss = self.semantic_loss(\n            bbox_preds['sem_scores' + suffix].transpose(2, 1),\n            mask_targets,\n            weight=box_loss_weights)\n\n        losses = dict(\n            objectness_loss=objectness_loss,\n            semantic_loss=semantic_loss,\n            center_loss=center_loss,\n            dir_class_loss=dir_class_loss,\n            dir_res_loss=dir_res_loss,\n            size_class_loss=size_class_loss,\n            size_res_loss=size_res_loss)\n\n        return losses\n\n    def get_targets(self,\n                    points,\n                    gt_bboxes_3d,\n                    gt_labels_3d,\n                    pts_semantic_mask=None,\n                    pts_instance_mask=None,\n                    bbox_preds=None):\n        \"\"\"Generate targets of proposal module.\n\n        Args:\n            points (list[torch.Tensor]): Points of each batch.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                bboxes of each batch.\n            gt_labels_3d (list[torch.Tensor]): Labels of each batch.\n            pts_semantic_mask (list[torch.Tensor]): Point-wise semantic\n                label of each batch.\n            pts_instance_mask (list[torch.Tensor]): Point-wise instance\n                label of each batch.\n            bbox_preds (torch.Tensor): Bounding box predictions of vote head.\n\n        Returns:\n            tuple[torch.Tensor]: Targets of proposal module.\n        \"\"\"\n        # find empty example\n        valid_gt_masks = list()\n        gt_num = list()\n        for index in range(len(gt_labels_3d)):\n            if len(gt_labels_3d[index]) == 0:\n                fake_box = gt_bboxes_3d[index].tensor.new_zeros(\n                    1, gt_bboxes_3d[index].tensor.shape[-1])\n                gt_bboxes_3d[index] = gt_bboxes_3d[index].new_box(fake_box)\n                gt_labels_3d[index] = gt_labels_3d[index].new_zeros(1)\n                valid_gt_masks.append(gt_labels_3d[index].new_zeros(1))\n                gt_num.append(1)\n            else:\n                valid_gt_masks.append(gt_labels_3d[index].new_ones(\n                    gt_labels_3d[index].shape))\n                gt_num.append(gt_labels_3d[index].shape[0])\n\n        if pts_semantic_mask is None:\n            pts_semantic_mask = [None for i in range(len(gt_labels_3d))]\n            pts_instance_mask = [None for i in range(len(gt_labels_3d))]\n\n        aggregated_points = [\n            bbox_preds['aggregated_points'][i]\n            for i in range(len(gt_labels_3d))\n        ]\n\n        surface_center_pred = [\n            bbox_preds['surface_center_pred'][i]\n            for i in range(len(gt_labels_3d))\n        ]\n\n        line_center_pred = [\n            bbox_preds['pred_line_center'][i]\n            for i in range(len(gt_labels_3d))\n        ]\n\n        surface_center_object = [\n            bbox_preds['surface_center_object'][i]\n            for i in range(len(gt_labels_3d))\n        ]\n\n        line_center_object = [\n            bbox_preds['line_center_object'][i]\n            for i in range(len(gt_labels_3d))\n        ]\n\n        surface_sem_pred = [\n            bbox_preds['surface_sem_pred'][i]\n            for i in range(len(gt_labels_3d))\n        ]\n\n        line_sem_pred = [\n            bbox_preds['sem_cls_scores_line'][i]\n            for i in range(len(gt_labels_3d))\n        ]\n\n        (cues_objectness_label, cues_sem_label, proposal_objectness_label,\n         cues_mask, cues_match_mask, proposal_objectness_mask,\n         cues_matching_label, obj_surface_line_center) = multi_apply(\n             self.get_targets_single, points, gt_bboxes_3d, gt_labels_3d,\n             pts_semantic_mask, pts_instance_mask, aggregated_points,\n             surface_center_pred, line_center_pred, surface_center_object,\n             line_center_object, surface_sem_pred, line_sem_pred)\n\n        cues_objectness_label = torch.stack(cues_objectness_label)\n        cues_sem_label = torch.stack(cues_sem_label)\n        proposal_objectness_label = torch.stack(proposal_objectness_label)\n        cues_mask = torch.stack(cues_mask)\n        cues_match_mask = torch.stack(cues_match_mask)\n        proposal_objectness_mask = torch.stack(proposal_objectness_mask)\n        cues_matching_label = torch.stack(cues_matching_label)\n        obj_surface_line_center = torch.stack(obj_surface_line_center)\n\n        return (cues_objectness_label, cues_sem_label,\n                proposal_objectness_label, cues_mask, cues_match_mask,\n                proposal_objectness_mask, cues_matching_label,\n                obj_surface_line_center)\n\n    def get_targets_single(self,\n                           points,\n                           gt_bboxes_3d,\n                           gt_labels_3d,\n                           pts_semantic_mask=None,\n                           pts_instance_mask=None,\n                           aggregated_points=None,\n                           pred_surface_center=None,\n                           pred_line_center=None,\n                           pred_obj_surface_center=None,\n                           pred_obj_line_center=None,\n                           pred_surface_sem=None,\n                           pred_line_sem=None):\n        \"\"\"Generate targets for primitive cues for single batch.\n\n        Args:\n            points (torch.Tensor): Points of each batch.\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth\n                boxes of each batch.\n            gt_labels_3d (torch.Tensor): Labels of each batch.\n            pts_semantic_mask (torch.Tensor): Point-wise semantic\n                label of each batch.\n            pts_instance_mask (torch.Tensor): Point-wise instance\n                label of each batch.\n            aggregated_points (torch.Tensor): Aggregated points from\n                vote aggregation layer.\n            pred_surface_center (torch.Tensor): Prediction of surface center.\n            pred_line_center (torch.Tensor): Prediction of line center.\n            pred_obj_surface_center (torch.Tensor): Objectness prediction\n                of surface center.\n            pred_obj_line_center (torch.Tensor): Objectness prediction of\n                line center.\n            pred_surface_sem (torch.Tensor): Semantic prediction of\n                surface center.\n            pred_line_sem (torch.Tensor): Semantic prediction of line center.\n        Returns:\n            tuple[torch.Tensor]: Targets for primitive cues.\n        \"\"\"\n        device = points.device\n        gt_bboxes_3d = gt_bboxes_3d.to(device)\n        num_proposals = aggregated_points.shape[0]\n        gt_center = gt_bboxes_3d.gravity_center\n\n        dist1, dist2, ind1, _ = chamfer_distance(\n            aggregated_points.unsqueeze(0),\n            gt_center.unsqueeze(0),\n            reduction='none')\n        # Set assignment\n        object_assignment = ind1.squeeze(0)\n\n        # Generate objectness label and mask\n        # objectness_label: 1 if pred object center is within\n        # self.train_cfg['near_threshold'] of any GT object\n        # objectness_mask: 0 if pred object center is in gray\n        # zone (DONOTCARE), 1 otherwise\n        euclidean_dist1 = torch.sqrt(dist1.squeeze(0) + 1e-6)\n        proposal_objectness_label = euclidean_dist1.new_zeros(\n            num_proposals, dtype=torch.long)\n        proposal_objectness_mask = euclidean_dist1.new_zeros(num_proposals)\n\n        gt_sem = gt_labels_3d[object_assignment]\n\n        obj_surface_center, obj_line_center = \\\n            gt_bboxes_3d.get_surface_line_center()\n        obj_surface_center = obj_surface_center.reshape(-1, 6,\n                                                        3).transpose(0, 1)\n        obj_line_center = obj_line_center.reshape(-1, 12, 3).transpose(0, 1)\n        obj_surface_center = obj_surface_center[:, object_assignment].reshape(\n            1, -1, 3)\n        obj_line_center = obj_line_center[:,\n                                          object_assignment].reshape(1, -1, 3)\n\n        surface_sem = torch.argmax(pred_surface_sem, dim=1).float()\n        line_sem = torch.argmax(pred_line_sem, dim=1).float()\n\n        dist_surface, _, surface_ind, _ = chamfer_distance(\n            obj_surface_center,\n            pred_surface_center.unsqueeze(0),\n            reduction='none')\n        dist_line, _, line_ind, _ = chamfer_distance(\n            obj_line_center, pred_line_center.unsqueeze(0), reduction='none')\n\n        surface_sel = pred_surface_center[surface_ind.squeeze(0)]\n        line_sel = pred_line_center[line_ind.squeeze(0)]\n        surface_sel_sem = surface_sem[surface_ind.squeeze(0)]\n        line_sel_sem = line_sem[line_ind.squeeze(0)]\n\n        surface_sel_sem_gt = gt_sem.repeat(6).float()\n        line_sel_sem_gt = gt_sem.repeat(12).float()\n\n        euclidean_dist_surface = torch.sqrt(dist_surface.squeeze(0) + 1e-6)\n        euclidean_dist_line = torch.sqrt(dist_line.squeeze(0) + 1e-6)\n        objectness_label_surface = euclidean_dist_line.new_zeros(\n            num_proposals * 6, dtype=torch.long)\n        objectness_mask_surface = euclidean_dist_line.new_zeros(num_proposals *\n                                                                6)\n        objectness_label_line = euclidean_dist_line.new_zeros(\n            num_proposals * 12, dtype=torch.long)\n        objectness_mask_line = euclidean_dist_line.new_zeros(num_proposals *\n                                                             12)\n        objectness_label_surface_sem = euclidean_dist_line.new_zeros(\n            num_proposals * 6, dtype=torch.long)\n        objectness_label_line_sem = euclidean_dist_line.new_zeros(\n            num_proposals * 12, dtype=torch.long)\n\n        euclidean_dist_obj_surface = torch.sqrt((\n            (pred_obj_surface_center - surface_sel)**2).sum(dim=-1) + 1e-6)\n        euclidean_dist_obj_line = torch.sqrt(\n            torch.sum((pred_obj_line_center - line_sel)**2, dim=-1) + 1e-6)\n\n        # Objectness score just with centers\n        proposal_objectness_label[\n            euclidean_dist1 < self.train_cfg['near_threshold']] = 1\n        proposal_objectness_mask[\n            euclidean_dist1 < self.train_cfg['near_threshold']] = 1\n        proposal_objectness_mask[\n            euclidean_dist1 > self.train_cfg['far_threshold']] = 1\n\n        objectness_label_surface[\n            (euclidean_dist_obj_surface <\n             self.train_cfg['label_surface_threshold']) *\n            (euclidean_dist_surface <\n             self.train_cfg['mask_surface_threshold'])] = 1\n        objectness_label_surface_sem[\n            (euclidean_dist_obj_surface <\n             self.train_cfg['label_surface_threshold']) *\n            (euclidean_dist_surface < self.train_cfg['mask_surface_threshold'])\n            * (surface_sel_sem == surface_sel_sem_gt)] = 1\n\n        objectness_label_line[\n            (euclidean_dist_obj_line < self.train_cfg['label_line_threshold'])\n            *\n            (euclidean_dist_line < self.train_cfg['mask_line_threshold'])] = 1\n        objectness_label_line_sem[\n            (euclidean_dist_obj_line < self.train_cfg['label_line_threshold'])\n            * (euclidean_dist_line < self.train_cfg['mask_line_threshold']) *\n            (line_sel_sem == line_sel_sem_gt)] = 1\n\n        objectness_label_surface_obj = proposal_objectness_label.repeat(6)\n        objectness_mask_surface_obj = proposal_objectness_mask.repeat(6)\n        objectness_label_line_obj = proposal_objectness_label.repeat(12)\n        objectness_mask_line_obj = proposal_objectness_mask.repeat(12)\n\n        objectness_mask_surface = objectness_mask_surface_obj\n        objectness_mask_line = objectness_mask_line_obj\n\n        cues_objectness_label = torch.cat(\n            (objectness_label_surface, objectness_label_line), 0)\n        cues_sem_label = torch.cat(\n            (objectness_label_surface_sem, objectness_label_line_sem), 0)\n        cues_mask = torch.cat((objectness_mask_surface, objectness_mask_line),\n                              0)\n\n        objectness_label_surface *= objectness_label_surface_obj\n        objectness_label_line *= objectness_label_line_obj\n        cues_matching_label = torch.cat(\n            (objectness_label_surface, objectness_label_line), 0)\n\n        objectness_label_surface_sem *= objectness_label_surface_obj\n        objectness_label_line_sem *= objectness_label_line_obj\n\n        cues_match_mask = (torch.sum(\n            cues_objectness_label.view(18, num_proposals), dim=0) >=\n                           1).float()\n\n        obj_surface_line_center = torch.cat(\n            (obj_surface_center, obj_line_center), 1).squeeze(0)\n\n        return (cues_objectness_label, cues_sem_label,\n                proposal_objectness_label, cues_mask, cues_match_mask,\n                proposal_objectness_mask, cues_matching_label,\n                obj_surface_line_center)\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nfrom mmcv.cnn import ConvModule, normal_init\n\nfrom mmdet3d.ops.spconv import IS_SPCONV2_AVAILABLE\n\nif IS_SPCONV2_AVAILABLE:\n    from spconv.pytorch import (SparseConvTensor, SparseMaxPool3d,\n                                SparseSequential)\nelse:\n    from mmcv.ops import SparseConvTensor, SparseMaxPool3d, SparseSequential\n\nfrom mmcv.runner import BaseModule\nfrom torch import nn as nn\n\nfrom mmdet3d.core.bbox.structures import (LiDARInstance3DBoxes,\n                                          rotation_3d_in_axis, xywhr2xyxyr)\nfrom mmdet3d.core.post_processing import nms_bev, nms_normal_bev\nfrom mmdet3d.models.builder import HEADS, build_loss\nfrom mmdet3d.ops import make_sparse_convmodule\nfrom mmdet.core import build_bbox_coder, multi_apply\n\n\n@HEADS.register_module()\nclass PartA2BboxHead(BaseModule):\n    \"\"\"PartA2 RoI head.\n\n    Args:\n        num_classes (int): The number of classes to prediction.\n        seg_in_channels (int): Input channels of segmentation\n            convolution layer.\n        part_in_channels (int): Input channels of part convolution layer.\n        seg_conv_channels (list(int)): Out channels of each\n            segmentation convolution layer.\n        part_conv_channels (list(int)): Out channels of each\n            part convolution layer.\n        merge_conv_channels (list(int)): Out channels of each\n            feature merged convolution layer.\n        down_conv_channels (list(int)): Out channels of each\n            downsampled convolution layer.\n        shared_fc_channels (list(int)): Out channels of each shared fc layer.\n        cls_channels (list(int)): Out channels of each classification layer.\n        reg_channels (list(int)): Out channels of each regression layer.\n        dropout_ratio (float): Dropout ratio of classification and\n            regression layers.\n        roi_feat_size (int): The size of pooled roi features.\n        with_corner_loss (bool): Whether to use corner loss or not.\n        bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for box head.\n        conv_cfg (dict): Config dict of convolutional layers\n        norm_cfg (dict): Config dict of normalization layers\n        loss_bbox (dict): Config dict of box regression loss.\n        loss_cls (dict): Config dict of classifacation loss.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 seg_in_channels,\n                 part_in_channels,\n                 seg_conv_channels=None,\n                 part_conv_channels=None,\n                 merge_conv_channels=None,\n                 down_conv_channels=None,\n                 shared_fc_channels=None,\n                 cls_channels=None,\n                 reg_channels=None,\n                 dropout_ratio=0.1,\n                 roi_feat_size=14,\n                 with_corner_loss=True,\n                 bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n                 loss_bbox=dict(\n                     type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),\n                 loss_cls=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     reduction='none',\n                     loss_weight=1.0),\n                 init_cfg=None):\n        super(PartA2BboxHead, self).__init__(init_cfg=init_cfg)\n        self.num_classes = num_classes\n        self.with_corner_loss = with_corner_loss\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n        self.loss_bbox = build_loss(loss_bbox)\n        self.loss_cls = build_loss(loss_cls)\n        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)\n\n        assert down_conv_channels[-1] == shared_fc_channels[0]\n\n        # init layers\n        part_channel_last = part_in_channels\n        part_conv = []\n        for i, channel in enumerate(part_conv_channels):\n            part_conv.append(\n                make_sparse_convmodule(\n                    part_channel_last,\n                    channel,\n                    3,\n                    padding=1,\n                    norm_cfg=norm_cfg,\n                    indice_key=f'rcnn_part{i}',\n                    conv_type='SubMConv3d'))\n            part_channel_last = channel\n        self.part_conv = SparseSequential(*part_conv)\n\n        seg_channel_last = seg_in_channels\n        seg_conv = []\n        for i, channel in enumerate(seg_conv_channels):\n            seg_conv.append(\n                make_sparse_convmodule(\n                    seg_channel_last,\n                    channel,\n                    3,\n                    padding=1,\n                    norm_cfg=norm_cfg,\n                    indice_key=f'rcnn_seg{i}',\n                    conv_type='SubMConv3d'))\n            seg_channel_last = channel\n        self.seg_conv = SparseSequential(*seg_conv)\n\n        self.conv_down = SparseSequential()\n\n        merge_conv_channel_last = part_channel_last + seg_channel_last\n        merge_conv = []\n        for i, channel in enumerate(merge_conv_channels):\n            merge_conv.append(\n                make_sparse_convmodule(\n                    merge_conv_channel_last,\n                    channel,\n                    3,\n                    padding=1,\n                    norm_cfg=norm_cfg,\n                    indice_key='rcnn_down0'))\n            merge_conv_channel_last = channel\n\n        down_conv_channel_last = merge_conv_channel_last\n        conv_down = []\n        for i, channel in enumerate(down_conv_channels):\n            conv_down.append(\n                make_sparse_convmodule(\n                    down_conv_channel_last,\n                    channel,\n                    3,\n                    padding=1,\n                    norm_cfg=norm_cfg,\n                    indice_key='rcnn_down1'))\n            down_conv_channel_last = channel\n\n        self.conv_down.add_module('merge_conv', SparseSequential(*merge_conv))\n        self.conv_down.add_module('max_pool3d',\n                                  SparseMaxPool3d(kernel_size=2, stride=2))\n        self.conv_down.add_module('down_conv', SparseSequential(*conv_down))\n\n        shared_fc_list = []\n        pool_size = roi_feat_size // 2\n        pre_channel = shared_fc_channels[0] * pool_size**3\n        for k in range(1, len(shared_fc_channels)):\n            shared_fc_list.append(\n                ConvModule(\n                    pre_channel,\n                    shared_fc_channels[k],\n                    1,\n                    padding=0,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    inplace=True))\n            pre_channel = shared_fc_channels[k]\n\n            if k != len(shared_fc_channels) - 1 and dropout_ratio > 0:\n                shared_fc_list.append(nn.Dropout(dropout_ratio))\n\n        self.shared_fc = nn.Sequential(*shared_fc_list)\n\n        # Classification layer\n        channel_in = shared_fc_channels[-1]\n        cls_channel = 1\n        cls_layers = []\n        pre_channel = channel_in\n        for k in range(0, len(cls_channels)):\n            cls_layers.append(\n                ConvModule(\n                    pre_channel,\n                    cls_channels[k],\n                    1,\n                    padding=0,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    inplace=True))\n            pre_channel = cls_channels[k]\n        cls_layers.append(\n            ConvModule(\n                pre_channel,\n                cls_channel,\n                1,\n                padding=0,\n                conv_cfg=conv_cfg,\n                act_cfg=None))\n        if dropout_ratio >= 0:\n            cls_layers.insert(1, nn.Dropout(dropout_ratio))\n\n        self.conv_cls = nn.Sequential(*cls_layers)\n\n        # Regression layer\n        reg_layers = []\n        pre_channel = channel_in\n        for k in range(0, len(reg_channels)):\n            reg_layers.append(\n                ConvModule(\n                    pre_channel,\n                    reg_channels[k],\n                    1,\n                    padding=0,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    inplace=True))\n            pre_channel = reg_channels[k]\n        reg_layers.append(\n            ConvModule(\n                pre_channel,\n                self.bbox_coder.code_size,\n                1,\n                padding=0,\n                conv_cfg=conv_cfg,\n                act_cfg=None))\n        if dropout_ratio >= 0:\n            reg_layers.insert(1, nn.Dropout(dropout_ratio))\n\n        self.conv_reg = nn.Sequential(*reg_layers)\n\n        if init_cfg is None:\n            self.init_cfg = dict(\n                type='Xavier',\n                layer=['Conv2d', 'Conv1d'],\n                distribution='uniform')\n\n    def init_weights(self):\n        super().init_weights()\n        normal_init(self.conv_reg[-1].conv, mean=0, std=0.001)\n\n    def forward(self, seg_feats, part_feats):\n        \"\"\"Forward pass.\n\n        Args:\n            seg_feats (torch.Tensor): Point-wise semantic features.\n            part_feats (torch.Tensor): Point-wise part prediction features.\n\n        Returns:\n            tuple[torch.Tensor]: Score of class and bbox predictions.\n        \"\"\"\n        # (B * N, out_x, out_y, out_z, 4)\n        rcnn_batch_size = part_feats.shape[0]\n\n        # transform to sparse tensors\n        sparse_shape = part_feats.shape[1:4]\n        # (non_empty_num, 4) ==> [bs_idx, x_idx, y_idx, z_idx]\n        sparse_idx = part_feats.sum(dim=-1).nonzero(as_tuple=False)\n\n        part_features = part_feats[sparse_idx[:, 0], sparse_idx[:, 1],\n                                   sparse_idx[:, 2], sparse_idx[:, 3]]\n        seg_features = seg_feats[sparse_idx[:, 0], sparse_idx[:, 1],\n                                 sparse_idx[:, 2], sparse_idx[:, 3]]\n        coords = sparse_idx.int().contiguous()\n        part_features = SparseConvTensor(part_features, coords, sparse_shape,\n                                         rcnn_batch_size)\n        seg_features = SparseConvTensor(seg_features, coords, sparse_shape,\n                                        rcnn_batch_size)\n\n        # forward rcnn network\n        x_part = self.part_conv(part_features)\n        x_rpn = self.seg_conv(seg_features)\n\n        merged_feature = torch.cat((x_rpn.features, x_part.features),\n                                   dim=1)  # (N, C)\n        shared_feature = SparseConvTensor(merged_feature, coords, sparse_shape,\n                                          rcnn_batch_size)\n\n        x = self.conv_down(shared_feature)\n\n        shared_feature = x.dense().view(rcnn_batch_size, -1, 1)\n\n        shared_feature = self.shared_fc(shared_feature)\n\n        cls_score = self.conv_cls(shared_feature).transpose(\n            1, 2).contiguous().squeeze(dim=1)  # (B, 1)\n        bbox_pred = self.conv_reg(shared_feature).transpose(\n            1, 2).contiguous().squeeze(dim=1)  # (B, C)\n\n        return cls_score, bbox_pred\n\n    def loss(self, cls_score, bbox_pred, rois, labels, bbox_targets,\n             pos_gt_bboxes, reg_mask, label_weights, bbox_weights):\n        \"\"\"Computing losses.\n\n        Args:\n            cls_score (torch.Tensor): Scores of each roi.\n            bbox_pred (torch.Tensor): Predictions of bboxes.\n            rois (torch.Tensor): Roi bboxes.\n            labels (torch.Tensor): Labels of class.\n            bbox_targets (torch.Tensor): Target of positive bboxes.\n            pos_gt_bboxes (torch.Tensor): Ground truths of positive bboxes.\n            reg_mask (torch.Tensor): Mask for positive bboxes.\n            label_weights (torch.Tensor): Weights of class loss.\n            bbox_weights (torch.Tensor): Weights of bbox loss.\n\n        Returns:\n            dict: Computed losses.\n\n                - loss_cls (torch.Tensor): Loss of classes.\n                - loss_bbox (torch.Tensor): Loss of bboxes.\n                - loss_corner (torch.Tensor): Loss of corners.\n        \"\"\"\n        losses = dict()\n        rcnn_batch_size = cls_score.shape[0]\n\n        # calculate class loss\n        cls_flat = cls_score.view(-1)\n        loss_cls = self.loss_cls(cls_flat, labels, label_weights)\n        losses['loss_cls'] = loss_cls\n\n        # calculate regression loss\n        code_size = self.bbox_coder.code_size\n        pos_inds = (reg_mask > 0)\n        if pos_inds.any() == 0:\n            # fake a part loss\n            losses['loss_bbox'] = loss_cls.new_tensor(0)\n            if self.with_corner_loss:\n                losses['loss_corner'] = loss_cls.new_tensor(0)\n        else:\n            pos_bbox_pred = bbox_pred.view(rcnn_batch_size, -1)[pos_inds]\n            bbox_weights_flat = bbox_weights[pos_inds].view(-1, 1).repeat(\n                1, pos_bbox_pred.shape[-1])\n            loss_bbox = self.loss_bbox(\n                pos_bbox_pred.unsqueeze(dim=0), bbox_targets.unsqueeze(dim=0),\n                bbox_weights_flat.unsqueeze(dim=0))\n            losses['loss_bbox'] = loss_bbox\n\n            if self.with_corner_loss:\n                pos_roi_boxes3d = rois[..., 1:].view(-1, code_size)[pos_inds]\n                pos_roi_boxes3d = pos_roi_boxes3d.view(-1, code_size)\n                batch_anchors = pos_roi_boxes3d.clone().detach()\n                pos_rois_rotation = pos_roi_boxes3d[..., 6].view(-1)\n                roi_xyz = pos_roi_boxes3d[..., 0:3].view(-1, 3)\n                batch_anchors[..., 0:3] = 0\n                # decode boxes\n                pred_boxes3d = self.bbox_coder.decode(\n                    batch_anchors,\n                    pos_bbox_pred.view(-1, code_size)).view(-1, code_size)\n\n                pred_boxes3d[..., 0:3] = rotation_3d_in_axis(\n                    pred_boxes3d[..., 0:3].unsqueeze(1),\n                    pos_rois_rotation,\n                    axis=2).squeeze(1)\n\n                pred_boxes3d[:, 0:3] += roi_xyz\n\n                # calculate corner loss\n                loss_corner = self.get_corner_loss_lidar(\n                    pred_boxes3d, pos_gt_bboxes)\n                losses['loss_corner'] = loss_corner\n\n        return losses\n\n    def get_targets(self, sampling_results, rcnn_train_cfg, concat=True):\n        \"\"\"Generate targets.\n\n        Args:\n            sampling_results (list[:obj:`SamplingResult`]):\n                Sampled results from rois.\n            rcnn_train_cfg (:obj:`ConfigDict`): Training config of rcnn.\n            concat (bool): Whether to concatenate targets between batches.\n\n        Returns:\n            tuple[torch.Tensor]: Targets of boxes and class prediction.\n        \"\"\"\n        pos_bboxes_list = [res.pos_bboxes for res in sampling_results]\n        pos_gt_bboxes_list = [res.pos_gt_bboxes for res in sampling_results]\n        iou_list = [res.iou for res in sampling_results]\n        targets = multi_apply(\n            self._get_target_single,\n            pos_bboxes_list,\n            pos_gt_bboxes_list,\n            iou_list,\n            cfg=rcnn_train_cfg)\n\n        (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,\n         bbox_weights) = targets\n\n        if concat:\n            label = torch.cat(label, 0)\n            bbox_targets = torch.cat(bbox_targets, 0)\n            pos_gt_bboxes = torch.cat(pos_gt_bboxes, 0)\n            reg_mask = torch.cat(reg_mask, 0)\n\n            label_weights = torch.cat(label_weights, 0)\n            label_weights /= torch.clamp(label_weights.sum(), min=1.0)\n\n            bbox_weights = torch.cat(bbox_weights, 0)\n            bbox_weights /= torch.clamp(bbox_weights.sum(), min=1.0)\n\n        return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,\n                bbox_weights)\n\n    def _get_target_single(self, pos_bboxes, pos_gt_bboxes, ious, cfg):\n        \"\"\"Generate training targets for a single sample.\n\n        Args:\n            pos_bboxes (torch.Tensor): Positive boxes with shape\n                (N, 7).\n            pos_gt_bboxes (torch.Tensor): Ground truth boxes with shape\n                (M, 7).\n            ious (torch.Tensor): IoU between `pos_bboxes` and `pos_gt_bboxes`\n                in shape (N, M).\n            cfg (dict): Training configs.\n\n        Returns:\n            tuple[torch.Tensor]: Target for positive boxes.\n                (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,\n                bbox_weights)\n        \"\"\"\n        cls_pos_mask = ious > cfg.cls_pos_thr\n        cls_neg_mask = ious < cfg.cls_neg_thr\n        interval_mask = (cls_pos_mask == 0) & (cls_neg_mask == 0)\n\n        # iou regression target\n        label = (cls_pos_mask > 0).float()\n        label[interval_mask] = ious[interval_mask] * 2 - 0.5\n        # label weights\n        label_weights = (label >= 0).float()\n\n        # box regression target\n        reg_mask = pos_bboxes.new_zeros(ious.size(0)).long()\n        reg_mask[0:pos_gt_bboxes.size(0)] = 1\n        bbox_weights = (reg_mask > 0).float()\n        if reg_mask.bool().any():\n            pos_gt_bboxes_ct = pos_gt_bboxes.clone().detach()\n            roi_center = pos_bboxes[..., 0:3]\n            roi_ry = pos_bboxes[..., 6] % (2 * np.pi)\n\n            # canonical transformation\n            pos_gt_bboxes_ct[..., 0:3] -= roi_center\n            pos_gt_bboxes_ct[..., 6] -= roi_ry\n            pos_gt_bboxes_ct[..., 0:3] = rotation_3d_in_axis(\n                pos_gt_bboxes_ct[..., 0:3].unsqueeze(1), -roi_ry,\n                axis=2).squeeze(1)\n\n            # flip orientation if rois have opposite orientation\n            ry_label = pos_gt_bboxes_ct[..., 6] % (2 * np.pi)  # 0 ~ 2pi\n            opposite_flag = (ry_label > np.pi * 0.5) & (ry_label < np.pi * 1.5)\n            ry_label[opposite_flag] = (ry_label[opposite_flag] + np.pi) % (\n                2 * np.pi)  # (0 ~ pi/2, 3pi/2 ~ 2pi)\n            flag = ry_label > np.pi\n            ry_label[flag] = ry_label[flag] - np.pi * 2  # (-pi/2, pi/2)\n            ry_label = torch.clamp(ry_label, min=-np.pi / 2, max=np.pi / 2)\n            pos_gt_bboxes_ct[..., 6] = ry_label\n\n            rois_anchor = pos_bboxes.clone().detach()\n            rois_anchor[:, 0:3] = 0\n            rois_anchor[:, 6] = 0\n            bbox_targets = self.bbox_coder.encode(rois_anchor,\n                                                  pos_gt_bboxes_ct)\n        else:\n            # no fg bbox\n            bbox_targets = pos_gt_bboxes.new_empty((0, 7))\n\n        return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,\n                bbox_weights)\n\n    def get_corner_loss_lidar(self, pred_bbox3d, gt_bbox3d, delta=1.0):\n        \"\"\"Calculate corner loss of given boxes.\n\n        Args:\n            pred_bbox3d (torch.FloatTensor): Predicted boxes in shape (N, 7).\n            gt_bbox3d (torch.FloatTensor): Ground truth boxes in shape (N, 7).\n            delta (float, optional): huber loss threshold. Defaults to 1.0\n\n        Returns:\n            torch.FloatTensor: Calculated corner loss in shape (N).\n        \"\"\"\n        assert pred_bbox3d.shape[0] == gt_bbox3d.shape[0]\n\n        # This is a little bit hack here because we assume the box for\n        # Part-A2 is in LiDAR coordinates\n        gt_boxes_structure = LiDARInstance3DBoxes(gt_bbox3d)\n        pred_box_corners = LiDARInstance3DBoxes(pred_bbox3d).corners\n        gt_box_corners = gt_boxes_structure.corners\n\n        # This flip only changes the heading direction of GT boxes\n        gt_bbox3d_flip = gt_boxes_structure.clone()\n        gt_bbox3d_flip.tensor[:, 6] += np.pi\n        gt_box_corners_flip = gt_bbox3d_flip.corners\n\n        corner_dist = torch.min(\n            torch.norm(pred_box_corners - gt_box_corners, dim=2),\n            torch.norm(pred_box_corners - gt_box_corners_flip,\n                       dim=2))  # (N, 8)\n        # huber loss\n        abs_error = corner_dist.abs()\n        quadratic = abs_error.clamp(max=delta)\n        linear = (abs_error - quadratic)\n        corner_loss = 0.5 * quadratic**2 + delta * linear\n\n        return corner_loss.mean(dim=1)\n\n    def get_bboxes(self,\n                   rois,\n                   cls_score,\n                   bbox_pred,\n                   class_labels,\n                   class_pred,\n                   img_metas,\n                   cfg=None):\n        \"\"\"Generate bboxes from bbox head predictions.\n\n        Args:\n            rois (torch.Tensor): Roi bounding boxes.\n            cls_score (torch.Tensor): Scores of bounding boxes.\n            bbox_pred (torch.Tensor): Bounding boxes predictions\n            class_labels (torch.Tensor): Label of classes\n            class_pred (torch.Tensor): Score for nms.\n            img_metas (list[dict]): Point cloud and image's meta info.\n            cfg (:obj:`ConfigDict`): Testing config.\n\n        Returns:\n            list[tuple]: Decoded bbox, scores and labels after nms.\n        \"\"\"\n        roi_batch_id = rois[..., 0]\n        roi_boxes = rois[..., 1:]  # boxes without batch id\n        batch_size = int(roi_batch_id.max().item() + 1)\n\n        # decode boxes\n        roi_ry = roi_boxes[..., 6].view(-1)\n        roi_xyz = roi_boxes[..., 0:3].view(-1, 3)\n        local_roi_boxes = roi_boxes.clone().detach()\n        local_roi_boxes[..., 0:3] = 0\n        rcnn_boxes3d = self.bbox_coder.decode(local_roi_boxes, bbox_pred)\n        rcnn_boxes3d[..., 0:3] = rotation_3d_in_axis(\n            rcnn_boxes3d[..., 0:3].unsqueeze(1), roi_ry, axis=2).squeeze(1)\n        rcnn_boxes3d[:, 0:3] += roi_xyz\n\n        # post processing\n        result_list = []\n        for batch_id in range(batch_size):\n            cur_class_labels = class_labels[batch_id]\n            cur_cls_score = cls_score[roi_batch_id == batch_id].view(-1)\n\n            cur_box_prob = class_pred[batch_id]\n            cur_rcnn_boxes3d = rcnn_boxes3d[roi_batch_id == batch_id]\n            keep = self.multi_class_nms(cur_box_prob, cur_rcnn_boxes3d,\n                                        cfg.score_thr, cfg.nms_thr,\n                                        img_metas[batch_id],\n                                        cfg.use_rotate_nms)\n            selected_bboxes = cur_rcnn_boxes3d[keep]\n            selected_label_preds = cur_class_labels[keep]\n            selected_scores = cur_cls_score[keep]\n\n            result_list.append(\n                (img_metas[batch_id]['box_type_3d'](selected_bboxes,\n                                                    self.bbox_coder.code_size),\n                 selected_scores, selected_label_preds))\n        return result_list\n\n    def multi_class_nms(self,\n                        box_probs,\n                        box_preds,\n                        score_thr,\n                        nms_thr,\n                        input_meta,\n                        use_rotate_nms=True):\n        \"\"\"Multi-class NMS for box head.\n\n        Note:\n            This function has large overlap with the `box3d_multiclass_nms`\n            implemented in `mmdet3d.core.post_processing`. We are considering\n            merging these two functions in the future.\n\n        Args:\n            box_probs (torch.Tensor): Predicted boxes probabitilies in\n                shape (N,).\n            box_preds (torch.Tensor): Predicted boxes in shape (N, 7+C).\n            score_thr (float): Threshold of scores.\n            nms_thr (float): Threshold for NMS.\n            input_meta (dict): Meta information of the current sample.\n            use_rotate_nms (bool, optional): Whether to use rotated nms.\n                Defaults to True.\n\n        Returns:\n            torch.Tensor: Selected indices.\n        \"\"\"\n        if use_rotate_nms:\n            nms_func = nms_bev\n        else:\n            nms_func = nms_normal_bev\n\n        assert box_probs.shape[\n            1] == self.num_classes, f'box_probs shape: {str(box_probs.shape)}'\n        selected_list = []\n        selected_labels = []\n        boxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](\n            box_preds, self.bbox_coder.code_size).bev)\n\n        score_thresh = score_thr if isinstance(\n            score_thr, list) else [score_thr for x in range(self.num_classes)]\n        nms_thresh = nms_thr if isinstance(\n            nms_thr, list) else [nms_thr for x in range(self.num_classes)]\n        for k in range(0, self.num_classes):\n            class_scores_keep = box_probs[:, k] >= score_thresh[k]\n\n            if class_scores_keep.int().sum() > 0:\n                original_idxs = class_scores_keep.nonzero(\n                    as_tuple=False).view(-1)\n                cur_boxes_for_nms = boxes_for_nms[class_scores_keep]\n                cur_rank_scores = box_probs[class_scores_keep, k]\n\n                cur_selected = nms_func(cur_boxes_for_nms, cur_rank_scores,\n                                        nms_thresh[k])\n\n                if cur_selected.shape[0] == 0:\n                    continue\n                selected_list.append(original_idxs[cur_selected])\n                selected_labels.append(\n                    torch.full([cur_selected.shape[0]],\n                               k + 1,\n                               dtype=torch.int64,\n                               device=box_preds.device))\n\n        keep = torch.cat(\n            selected_list, dim=0) if len(selected_list) > 0 else []\n        return keep\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/bbox_heads/point_rcnn_bbox_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nfrom mmcv.cnn import ConvModule, normal_init\nfrom mmcv.cnn.bricks import build_conv_layer\nfrom mmcv.runner import BaseModule\nfrom torch import nn as nn\n\nfrom mmdet3d.core.bbox.structures import (LiDARInstance3DBoxes,\n                                          rotation_3d_in_axis, xywhr2xyxyr)\nfrom mmdet3d.core.post_processing import nms_bev, nms_normal_bev\nfrom mmdet3d.models.builder import HEADS, build_loss\nfrom mmdet3d.ops import build_sa_module\nfrom mmdet.core import build_bbox_coder, multi_apply\n\n\n@HEADS.register_module()\nclass PointRCNNBboxHead(BaseModule):\n    \"\"\"PointRCNN RoI Bbox head.\n\n    Args:\n        num_classes (int): The number of classes to prediction.\n        in_channels (int)： Input channels of point features.\n        mlp_channels (list[int]): the number of mlp channels\n        pred_layer_cfg (dict, optional): Config of classfication and\n            regression prediction layers. Defaults to None.\n        num_points (tuple, optional): The number of points which each SA\n            module samples. Defaults to (128, 32, -1).\n        radius (tuple, optional): Sampling radius of each SA module.\n            Defaults to (0.2, 0.4, 100).\n        num_samples (tuple, optional): The number of samples for ball query\n            in each SA module. Defaults to (64, 64, 64).\n        sa_channels (tuple, optional): Out channels of each mlp in SA module.\n            Defaults to ((128, 128, 128), (128, 128, 256), (256, 256, 512)).\n        bbox_coder (dict, optional): Config dict of box coders.\n            Defaults to dict(type='DeltaXYZWLHRBBoxCoder').\n        sa_cfg (dict, optional): Config of set abstraction module, which may\n            contain the following keys and values:\n\n            - pool_mod (str): Pool method ('max' or 'avg') for SA modules.\n            - use_xyz (bool): Whether to use xyz as a part of features.\n            - normalize_xyz (bool): Whether to normalize xyz with radii in\n              each SA module.\n            Defaults to dict(type='PointSAModule', pool_mod='max',\n                use_xyz=True).\n        conv_cfg (dict, optional): Config dict of convolutional layers.\n             Defaults to dict(type='Conv1d').\n        norm_cfg (dict, optional): Config dict of normalization layers.\n             Defaults to dict(type='BN1d').\n        act_cfg (dict, optional): Config dict of activation layers.\n            Defaults to dict(type='ReLU').\n        bias (str, optional): Type of bias. Defaults to 'auto'.\n        loss_bbox (dict, optional): Config of regression loss function.\n            Defaults to dict(type='SmoothL1Loss', beta=1.0 / 9.0,\n                reduction='sum', loss_weight=1.0).\n        loss_cls (dict, optional): Config of classification loss function.\n             Defaults to dict(type='CrossEntropyLoss', use_sigmoid=True,\n                reduction='sum', loss_weight=1.0).\n        with_corner_loss (bool, optional): Whether using corner loss.\n            Defaults to True.\n        init_cfg (dict, optional): Config of initialization. Defaults to None.\n    \"\"\"\n\n    def __init__(\n            self,\n            num_classes,\n            in_channels,\n            mlp_channels,\n            pred_layer_cfg=None,\n            num_points=(128, 32, -1),\n            radius=(0.2, 0.4, 100),\n            num_samples=(64, 64, 64),\n            sa_channels=((128, 128, 128), (128, 128, 256), (256, 256, 512)),\n            bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n            sa_cfg=dict(type='PointSAModule', pool_mod='max', use_xyz=True),\n            conv_cfg=dict(type='Conv1d'),\n            norm_cfg=dict(type='BN1d'),\n            act_cfg=dict(type='ReLU'),\n            bias='auto',\n            loss_bbox=dict(\n                type='SmoothL1Loss',\n                beta=1.0 / 9.0,\n                reduction='sum',\n                loss_weight=1.0),\n            loss_cls=dict(\n                type='CrossEntropyLoss',\n                use_sigmoid=True,\n                reduction='sum',\n                loss_weight=1.0),\n            with_corner_loss=True,\n            init_cfg=None):\n        super(PointRCNNBboxHead, self).__init__(init_cfg=init_cfg)\n        self.num_classes = num_classes\n        self.num_sa = len(sa_channels)\n        self.with_corner_loss = with_corner_loss\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.act_cfg = act_cfg\n        self.bias = bias\n\n        self.loss_bbox = build_loss(loss_bbox)\n        self.loss_cls = build_loss(loss_cls)\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)\n\n        self.in_channels = in_channels\n        mlp_channels = [self.in_channels] + mlp_channels\n        shared_mlps = nn.Sequential()\n        for i in range(len(mlp_channels) - 1):\n            shared_mlps.add_module(\n                f'layer{i}',\n                ConvModule(\n                    mlp_channels[i],\n                    mlp_channels[i + 1],\n                    kernel_size=(1, 1),\n                    stride=(1, 1),\n                    inplace=False,\n                    conv_cfg=dict(type='Conv2d')))\n        self.xyz_up_layer = nn.Sequential(*shared_mlps)\n\n        c_out = mlp_channels[-1]\n        self.merge_down_layer = ConvModule(\n            c_out * 2,\n            c_out,\n            kernel_size=(1, 1),\n            stride=(1, 1),\n            inplace=False,\n            conv_cfg=dict(type='Conv2d'))\n\n        pre_channels = c_out\n\n        self.SA_modules = nn.ModuleList()\n        sa_in_channel = pre_channels\n\n        for sa_index in range(self.num_sa):\n            cur_sa_mlps = list(sa_channels[sa_index])\n            cur_sa_mlps = [sa_in_channel] + cur_sa_mlps\n            sa_out_channel = cur_sa_mlps[-1]\n\n            cur_num_points = num_points[sa_index]\n            if cur_num_points <= 0:\n                cur_num_points = None\n            self.SA_modules.append(\n                build_sa_module(\n                    num_point=cur_num_points,\n                    radius=radius[sa_index],\n                    num_sample=num_samples[sa_index],\n                    mlp_channels=cur_sa_mlps,\n                    cfg=sa_cfg))\n            sa_in_channel = sa_out_channel\n        self.cls_convs = self._add_conv_branch(\n            pred_layer_cfg.in_channels, pred_layer_cfg.cls_conv_channels)\n        self.reg_convs = self._add_conv_branch(\n            pred_layer_cfg.in_channels, pred_layer_cfg.reg_conv_channels)\n\n        prev_channel = pred_layer_cfg.cls_conv_channels[-1]\n        self.conv_cls = build_conv_layer(\n            self.conv_cfg,\n            in_channels=prev_channel,\n            out_channels=self.num_classes,\n            kernel_size=1)\n        prev_channel = pred_layer_cfg.reg_conv_channels[-1]\n        self.conv_reg = build_conv_layer(\n            self.conv_cfg,\n            in_channels=prev_channel,\n            out_channels=self.bbox_coder.code_size * self.num_classes,\n            kernel_size=1)\n\n        if init_cfg is None:\n            self.init_cfg = dict(type='Xavier', layer=['Conv2d', 'Conv1d'])\n\n    def _add_conv_branch(self, in_channels, conv_channels):\n        \"\"\"Add shared or separable branch.\n\n        Args:\n            in_channels (int): Input feature channel.\n            conv_channels (tuple): Middle feature channels.\n        \"\"\"\n        conv_spec = [in_channels] + list(conv_channels)\n        # add branch specific conv layers\n        conv_layers = nn.Sequential()\n        for i in range(len(conv_spec) - 1):\n            conv_layers.add_module(\n                f'layer{i}',\n                ConvModule(\n                    conv_spec[i],\n                    conv_spec[i + 1],\n                    kernel_size=1,\n                    padding=0,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg,\n                    act_cfg=self.act_cfg,\n                    bias=self.bias,\n                    inplace=True))\n        return conv_layers\n\n    def init_weights(self):\n        \"\"\"Initialize weights of the head.\"\"\"\n        super().init_weights()\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d):\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n        normal_init(self.conv_reg.weight, mean=0, std=0.001)\n\n    def forward(self, feats):\n        \"\"\"Forward pass.\n\n        Args:\n            feats (torch.Torch): Features from RCNN modules.\n\n        Returns:\n            tuple[torch.Tensor]: Score of class and bbox predictions.\n        \"\"\"\n        input_data = feats.clone().detach()\n        xyz_input = input_data[..., 0:self.in_channels].transpose(\n            1, 2).unsqueeze(dim=3).contiguous().clone().detach()\n        xyz_features = self.xyz_up_layer(xyz_input)\n        rpn_features = input_data[..., self.in_channels:].transpose(\n            1, 2).unsqueeze(dim=3)\n        merged_features = torch.cat((xyz_features, rpn_features), dim=1)\n        merged_features = self.merge_down_layer(merged_features)\n        l_xyz, l_features = [input_data[..., 0:3].contiguous()], \\\n                            [merged_features.squeeze(dim=3)]\n        for i in range(len(self.SA_modules)):\n            li_xyz, li_features, cur_indices = \\\n                self.SA_modules[i](l_xyz[i], l_features[i])\n            l_xyz.append(li_xyz)\n            l_features.append(li_features)\n\n        shared_features = l_features[-1]\n        x_cls = shared_features\n        x_reg = shared_features\n        x_cls = self.cls_convs(x_cls)\n        rcnn_cls = self.conv_cls(x_cls)\n        x_reg = self.reg_convs(x_reg)\n        rcnn_reg = self.conv_reg(x_reg)\n        rcnn_cls = rcnn_cls.transpose(1, 2).contiguous().squeeze(dim=1)\n        rcnn_reg = rcnn_reg.transpose(1, 2).contiguous().squeeze(dim=1)\n        return rcnn_cls, rcnn_reg\n\n    def loss(self, cls_score, bbox_pred, rois, labels, bbox_targets,\n             pos_gt_bboxes, reg_mask, label_weights, bbox_weights):\n        \"\"\"Computing losses.\n\n        Args:\n            cls_score (torch.Tensor): Scores of each RoI.\n            bbox_pred (torch.Tensor): Predictions of bboxes.\n            rois (torch.Tensor): RoI bboxes.\n            labels (torch.Tensor): Labels of class.\n            bbox_targets (torch.Tensor): Target of positive bboxes.\n            pos_gt_bboxes (torch.Tensor): Ground truths of positive bboxes.\n            reg_mask (torch.Tensor): Mask for positive bboxes.\n            label_weights (torch.Tensor): Weights of class loss.\n            bbox_weights (torch.Tensor): Weights of bbox loss.\n\n        Returns:\n            dict: Computed losses.\n\n                - loss_cls (torch.Tensor): Loss of classes.\n                - loss_bbox (torch.Tensor): Loss of bboxes.\n                - loss_corner (torch.Tensor): Loss of corners.\n        \"\"\"\n        losses = dict()\n        rcnn_batch_size = cls_score.shape[0]\n        # calculate class loss\n        cls_flat = cls_score.view(-1)\n        loss_cls = self.loss_cls(cls_flat, labels, label_weights)\n        losses['loss_cls'] = loss_cls\n\n        # calculate regression loss\n        code_size = self.bbox_coder.code_size\n        pos_inds = (reg_mask > 0)\n\n        pos_bbox_pred = bbox_pred.view(rcnn_batch_size, -1)[pos_inds].clone()\n        bbox_weights_flat = bbox_weights[pos_inds].view(-1, 1).repeat(\n            1, pos_bbox_pred.shape[-1])\n        loss_bbox = self.loss_bbox(\n            pos_bbox_pred.unsqueeze(dim=0),\n            bbox_targets.unsqueeze(dim=0).detach(),\n            bbox_weights_flat.unsqueeze(dim=0))\n        losses['loss_bbox'] = loss_bbox\n\n        if pos_inds.any() != 0 and self.with_corner_loss:\n            rois = rois.detach()\n            pos_roi_boxes3d = rois[..., 1:].view(-1, code_size)[pos_inds]\n            pos_roi_boxes3d = pos_roi_boxes3d.view(-1, code_size)\n            batch_anchors = pos_roi_boxes3d.clone().detach()\n            pos_rois_rotation = pos_roi_boxes3d[..., 6].view(-1)\n            roi_xyz = pos_roi_boxes3d[..., 0:3].view(-1, 3)\n            batch_anchors[..., 0:3] = 0\n            # decode boxes\n            pred_boxes3d = self.bbox_coder.decode(\n                batch_anchors,\n                pos_bbox_pred.view(-1, code_size)).view(-1, code_size)\n\n            pred_boxes3d[..., 0:3] = rotation_3d_in_axis(\n                pred_boxes3d[..., 0:3].unsqueeze(1), (pos_rois_rotation),\n                axis=2).squeeze(1)\n\n            pred_boxes3d[:, 0:3] += roi_xyz\n\n            # calculate corner loss\n            loss_corner = self.get_corner_loss_lidar(pred_boxes3d,\n                                                     pos_gt_bboxes)\n\n            losses['loss_corner'] = loss_corner\n        else:\n            losses['loss_corner'] = loss_cls.new_tensor(0)\n\n        return losses\n\n    def get_corner_loss_lidar(self, pred_bbox3d, gt_bbox3d, delta=1.0):\n        \"\"\"Calculate corner loss of given boxes.\n\n        Args:\n            pred_bbox3d (torch.FloatTensor): Predicted boxes in shape (N, 7).\n            gt_bbox3d (torch.FloatTensor): Ground truth boxes in shape (N, 7).\n            delta (float, optional): huber loss threshold. Defaults to 1.0\n\n        Returns:\n            torch.FloatTensor: Calculated corner loss in shape (N).\n        \"\"\"\n        assert pred_bbox3d.shape[0] == gt_bbox3d.shape[0]\n\n        # This is a little bit hack here because we assume the box for\n        # PointRCNN is in LiDAR coordinates\n\n        gt_boxes_structure = LiDARInstance3DBoxes(gt_bbox3d)\n        pred_box_corners = LiDARInstance3DBoxes(pred_bbox3d).corners\n        gt_box_corners = gt_boxes_structure.corners\n\n        # This flip only changes the heading direction of GT boxes\n        gt_bbox3d_flip = gt_boxes_structure.clone()\n        gt_bbox3d_flip.tensor[:, 6] += np.pi\n        gt_box_corners_flip = gt_bbox3d_flip.corners\n\n        corner_dist = torch.min(\n            torch.norm(pred_box_corners - gt_box_corners, dim=2),\n            torch.norm(pred_box_corners - gt_box_corners_flip, dim=2))\n        # huber loss\n        abs_error = corner_dist.abs()\n        quadratic = abs_error.clamp(max=delta)\n        linear = (abs_error - quadratic)\n        corner_loss = 0.5 * quadratic**2 + delta * linear\n        return corner_loss.mean(dim=1)\n\n    def get_targets(self, sampling_results, rcnn_train_cfg, concat=True):\n        \"\"\"Generate targets.\n\n        Args:\n            sampling_results (list[:obj:`SamplingResult`]):\n                Sampled results from rois.\n            rcnn_train_cfg (:obj:`ConfigDict`): Training config of rcnn.\n            concat (bool, optional): Whether to concatenate targets between\n                batches. Defaults to True.\n\n        Returns:\n            tuple[torch.Tensor]: Targets of boxes and class prediction.\n        \"\"\"\n        pos_bboxes_list = [res.pos_bboxes for res in sampling_results]\n        pos_gt_bboxes_list = [res.pos_gt_bboxes for res in sampling_results]\n        iou_list = [res.iou for res in sampling_results]\n        targets = multi_apply(\n            self._get_target_single,\n            pos_bboxes_list,\n            pos_gt_bboxes_list,\n            iou_list,\n            cfg=rcnn_train_cfg)\n        (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,\n         bbox_weights) = targets\n\n        if concat:\n            label = torch.cat(label, 0)\n            bbox_targets = torch.cat(bbox_targets, 0)\n            pos_gt_bboxes = torch.cat(pos_gt_bboxes, 0)\n            reg_mask = torch.cat(reg_mask, 0)\n\n            label_weights = torch.cat(label_weights, 0)\n            label_weights /= torch.clamp(label_weights.sum(), min=1.0)\n\n            bbox_weights = torch.cat(bbox_weights, 0)\n            bbox_weights /= torch.clamp(bbox_weights.sum(), min=1.0)\n\n        return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,\n                bbox_weights)\n\n    def _get_target_single(self, pos_bboxes, pos_gt_bboxes, ious, cfg):\n        \"\"\"Generate training targets for a single sample.\n\n        Args:\n            pos_bboxes (torch.Tensor): Positive boxes with shape\n                (N, 7).\n            pos_gt_bboxes (torch.Tensor): Ground truth boxes with shape\n                (M, 7).\n            ious (torch.Tensor): IoU between `pos_bboxes` and `pos_gt_bboxes`\n                in shape (N, M).\n            cfg (dict): Training configs.\n\n        Returns:\n            tuple[torch.Tensor]: Target for positive boxes.\n                (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,\n                bbox_weights)\n        \"\"\"\n        cls_pos_mask = ious > cfg.cls_pos_thr\n        cls_neg_mask = ious < cfg.cls_neg_thr\n        interval_mask = (cls_pos_mask == 0) & (cls_neg_mask == 0)\n        # iou regression target\n        label = (cls_pos_mask > 0).float()\n        label[interval_mask] = (ious[interval_mask] - cfg.cls_neg_thr) / \\\n            (cfg.cls_pos_thr - cfg.cls_neg_thr)\n        # label weights\n        label_weights = (label >= 0).float()\n        # box regression target\n        reg_mask = pos_bboxes.new_zeros(ious.size(0)).long()\n        reg_mask[0:pos_gt_bboxes.size(0)] = 1\n        bbox_weights = (reg_mask > 0).float()\n        if reg_mask.bool().any():\n            pos_gt_bboxes_ct = pos_gt_bboxes.clone().detach()\n            roi_center = pos_bboxes[..., 0:3]\n            roi_ry = pos_bboxes[..., 6] % (2 * np.pi)\n\n            # canonical transformation\n            pos_gt_bboxes_ct[..., 0:3] -= roi_center\n            pos_gt_bboxes_ct[..., 6] -= roi_ry\n            pos_gt_bboxes_ct[..., 0:3] = rotation_3d_in_axis(\n                pos_gt_bboxes_ct[..., 0:3].unsqueeze(1), -(roi_ry),\n                axis=2).squeeze(1)\n\n            # flip orientation if gt have opposite orientation\n            ry_label = pos_gt_bboxes_ct[..., 6] % (2 * np.pi)  # 0 ~ 2pi\n            is_opposite = (ry_label > np.pi * 0.5) & (ry_label < np.pi * 1.5)\n            ry_label[is_opposite] = (ry_label[is_opposite] + np.pi) % (\n                2 * np.pi)  # (0 ~ pi/2, 3pi/2 ~ 2pi)\n            flag = ry_label > np.pi\n            ry_label[flag] = ry_label[flag] - np.pi * 2  # (-pi/2, pi/2)\n            ry_label = torch.clamp(ry_label, min=-np.pi / 2, max=np.pi / 2)\n            pos_gt_bboxes_ct[..., 6] = ry_label\n\n            rois_anchor = pos_bboxes.clone().detach()\n            rois_anchor[:, 0:3] = 0\n            rois_anchor[:, 6] = 0\n            bbox_targets = self.bbox_coder.encode(rois_anchor,\n                                                  pos_gt_bboxes_ct)\n        else:\n            # no fg bbox\n            bbox_targets = pos_gt_bboxes.new_empty((0, 7))\n\n        return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,\n                bbox_weights)\n\n    def get_bboxes(self,\n                   rois,\n                   cls_score,\n                   bbox_pred,\n                   class_labels,\n                   img_metas,\n                   cfg=None):\n        \"\"\"Generate bboxes from bbox head predictions.\n\n        Args:\n            rois (torch.Tensor): RoI bounding boxes.\n            cls_score (torch.Tensor): Scores of bounding boxes.\n            bbox_pred (torch.Tensor): Bounding boxes predictions\n            class_labels (torch.Tensor): Label of classes\n            img_metas (list[dict]): Point cloud and image's meta info.\n            cfg (:obj:`ConfigDict`, optional): Testing config.\n                Defaults to None.\n\n        Returns:\n            list[tuple]: Decoded bbox, scores and labels after nms.\n        \"\"\"\n        roi_batch_id = rois[..., 0]\n        roi_boxes = rois[..., 1:]  # boxes without batch id\n        batch_size = int(roi_batch_id.max().item() + 1)\n\n        # decode boxes\n        roi_ry = roi_boxes[..., 6].view(-1)\n        roi_xyz = roi_boxes[..., 0:3].view(-1, 3)\n        local_roi_boxes = roi_boxes.clone().detach()\n        local_roi_boxes[..., 0:3] = 0\n        rcnn_boxes3d = self.bbox_coder.decode(local_roi_boxes, bbox_pred)\n        rcnn_boxes3d[..., 0:3] = rotation_3d_in_axis(\n            rcnn_boxes3d[..., 0:3].unsqueeze(1), roi_ry, axis=2).squeeze(1)\n        rcnn_boxes3d[:, 0:3] += roi_xyz\n\n        # post processing\n        result_list = []\n        for batch_id in range(batch_size):\n            cur_class_labels = class_labels[batch_id]\n            cur_cls_score = cls_score[roi_batch_id == batch_id].view(-1)\n\n            cur_box_prob = cur_cls_score.unsqueeze(1)\n            cur_rcnn_boxes3d = rcnn_boxes3d[roi_batch_id == batch_id]\n            keep = self.multi_class_nms(cur_box_prob, cur_rcnn_boxes3d,\n                                        cfg.score_thr, cfg.nms_thr,\n                                        img_metas[batch_id],\n                                        cfg.use_rotate_nms)\n            selected_bboxes = cur_rcnn_boxes3d[keep]\n            selected_label_preds = cur_class_labels[keep]\n            selected_scores = cur_cls_score[keep]\n\n            result_list.append(\n                (img_metas[batch_id]['box_type_3d'](selected_bboxes,\n                                                    self.bbox_coder.code_size),\n                 selected_scores, selected_label_preds))\n        return result_list\n\n    def multi_class_nms(self,\n                        box_probs,\n                        box_preds,\n                        score_thr,\n                        nms_thr,\n                        input_meta,\n                        use_rotate_nms=True):\n        \"\"\"Multi-class NMS for box head.\n\n        Note:\n            This function has large overlap with the `box3d_multiclass_nms`\n            implemented in `mmdet3d.core.post_processing`. We are considering\n            merging these two functions in the future.\n\n        Args:\n            box_probs (torch.Tensor): Predicted boxes probabilities in\n                shape (N,).\n            box_preds (torch.Tensor): Predicted boxes in shape (N, 7+C).\n            score_thr (float): Threshold of scores.\n            nms_thr (float): Threshold for NMS.\n            input_meta (dict): Meta information of the current sample.\n            use_rotate_nms (bool, optional): Whether to use rotated nms.\n                Defaults to True.\n\n        Returns:\n            torch.Tensor: Selected indices.\n        \"\"\"\n        if use_rotate_nms:\n            nms_func = nms_bev\n        else:\n            nms_func = nms_normal_bev\n\n        assert box_probs.shape[\n            1] == self.num_classes, f'box_probs shape: {str(box_probs.shape)}'\n        selected_list = []\n        selected_labels = []\n        boxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](\n            box_preds, self.bbox_coder.code_size).bev)\n\n        score_thresh = score_thr if isinstance(\n            score_thr, list) else [score_thr for x in range(self.num_classes)]\n        nms_thresh = nms_thr if isinstance(\n            nms_thr, list) else [nms_thr for x in range(self.num_classes)]\n        for k in range(0, self.num_classes):\n            class_scores_keep = box_probs[:, k] >= score_thresh[k]\n\n            if class_scores_keep.int().sum() > 0:\n                original_idxs = class_scores_keep.nonzero(\n                    as_tuple=False).view(-1)\n                cur_boxes_for_nms = boxes_for_nms[class_scores_keep]\n                cur_rank_scores = box_probs[class_scores_keep, k]\n\n                cur_selected = nms_func(cur_boxes_for_nms, cur_rank_scores,\n                                        nms_thresh[k])\n\n                if cur_selected.shape[0] == 0:\n                    continue\n                selected_list.append(original_idxs[cur_selected])\n                selected_labels.append(\n                    torch.full([cur_selected.shape[0]],\n                               k + 1,\n                               dtype=torch.int64,\n                               device=box_preds.device))\n\n        keep = torch.cat(\n            selected_list, dim=0) if len(selected_list) > 0 else []\n        return keep\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/h3d_roi_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmdet3d.core.bbox import bbox3d2result\nfrom ..builder import HEADS, build_head\nfrom .base_3droi_head import Base3DRoIHead\n\n\n@HEADS.register_module()\nclass H3DRoIHead(Base3DRoIHead):\n    \"\"\"H3D roi head for H3DNet.\n\n    Args:\n        primitive_list (List): Configs of primitive heads.\n        bbox_head (ConfigDict): Config of bbox_head.\n        train_cfg (ConfigDict): Training config.\n        test_cfg (ConfigDict): Testing config.\n    \"\"\"\n\n    def __init__(self,\n                 primitive_list,\n                 bbox_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(H3DRoIHead, self).__init__(\n            bbox_head=bbox_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained,\n            init_cfg=init_cfg)\n        # Primitive module\n        assert len(primitive_list) == 3\n        self.primitive_z = build_head(primitive_list[0])\n        self.primitive_xy = build_head(primitive_list[1])\n        self.primitive_line = build_head(primitive_list[2])\n\n    def init_mask_head(self):\n        \"\"\"Initialize mask head, skip since ``H3DROIHead`` does not have\n        one.\"\"\"\n        pass\n\n    def init_bbox_head(self, bbox_head):\n        \"\"\"Initialize box head.\"\"\"\n        bbox_head['train_cfg'] = self.train_cfg\n        bbox_head['test_cfg'] = self.test_cfg\n        self.bbox_head = build_head(bbox_head)\n\n    def init_assigner_sampler(self):\n        \"\"\"Initialize assigner and sampler.\"\"\"\n        pass\n\n    def forward_train(self,\n                      feats_dict,\n                      img_metas,\n                      points,\n                      gt_bboxes_3d,\n                      gt_labels_3d,\n                      pts_semantic_mask,\n                      pts_instance_mask,\n                      gt_bboxes_ignore=None):\n        \"\"\"Training forward function of PartAggregationROIHead.\n\n        Args:\n            feats_dict (dict): Contains features from the first stage.\n            img_metas (list[dict]): Contain pcd and img's meta info.\n            points (list[torch.Tensor]): Input points.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                bboxes of each sample.\n            gt_labels_3d (list[torch.Tensor]): Labels of each sample.\n            pts_semantic_mask (list[torch.Tensor]): Point-wise\n                semantic mask.\n            pts_instance_mask (list[torch.Tensor]): Point-wise\n                instance mask.\n            gt_bboxes_ignore (list[torch.Tensor]): Specify\n                which bounding boxes to ignore.\n\n        Returns:\n            dict: losses from each head.\n        \"\"\"\n        losses = dict()\n\n        sample_mod = self.train_cfg.sample_mod\n        assert sample_mod in ['vote', 'seed', 'random']\n        result_z = self.primitive_z(feats_dict, sample_mod)\n        feats_dict.update(result_z)\n\n        result_xy = self.primitive_xy(feats_dict, sample_mod)\n        feats_dict.update(result_xy)\n\n        result_line = self.primitive_line(feats_dict, sample_mod)\n        feats_dict.update(result_line)\n\n        primitive_loss_inputs = (feats_dict, points, gt_bboxes_3d,\n                                 gt_labels_3d, pts_semantic_mask,\n                                 pts_instance_mask, img_metas,\n                                 gt_bboxes_ignore)\n\n        loss_z = self.primitive_z.loss(*primitive_loss_inputs)\n        losses.update(loss_z)\n\n        loss_xy = self.primitive_xy.loss(*primitive_loss_inputs)\n        losses.update(loss_xy)\n\n        loss_line = self.primitive_line.loss(*primitive_loss_inputs)\n        losses.update(loss_line)\n\n        targets = feats_dict.pop('targets')\n\n        bbox_results = self.bbox_head(feats_dict, sample_mod)\n\n        feats_dict.update(bbox_results)\n        bbox_loss = self.bbox_head.loss(feats_dict, points, gt_bboxes_3d,\n                                        gt_labels_3d, pts_semantic_mask,\n                                        pts_instance_mask, img_metas, targets,\n                                        gt_bboxes_ignore)\n        losses.update(bbox_loss)\n\n        return losses\n\n    def simple_test(self, feats_dict, img_metas, points, rescale=False):\n        \"\"\"Simple testing forward function of PartAggregationROIHead.\n\n        Note:\n            This function assumes that the batch size is 1\n\n        Args:\n            feats_dict (dict): Contains features from the first stage.\n            img_metas (list[dict]): Contain pcd and img's meta info.\n            points (torch.Tensor): Input points.\n            rescale (bool): Whether to rescale results.\n\n        Returns:\n            dict: Bbox results of one frame.\n        \"\"\"\n        sample_mod = self.test_cfg.sample_mod\n        assert sample_mod in ['vote', 'seed', 'random']\n\n        result_z = self.primitive_z(feats_dict, sample_mod)\n        feats_dict.update(result_z)\n\n        result_xy = self.primitive_xy(feats_dict, sample_mod)\n        feats_dict.update(result_xy)\n\n        result_line = self.primitive_line(feats_dict, sample_mod)\n        feats_dict.update(result_line)\n\n        bbox_preds = self.bbox_head(feats_dict, sample_mod)\n        feats_dict.update(bbox_preds)\n        bbox_list = self.bbox_head.get_bboxes(\n            points,\n            feats_dict,\n            img_metas,\n            rescale=rescale,\n            suffix='_optimized')\n        bbox_results = [\n            bbox3d2result(bboxes, scores, labels)\n            for bboxes, scores, labels in bbox_list\n        ]\n        return bbox_results\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/mask_heads/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .pointwise_semantic_head import PointwiseSemanticHead\nfrom .primitive_head import PrimitiveHead\n\n__all__ = ['PointwiseSemanticHead', 'PrimitiveHead']\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.runner import BaseModule\nfrom torch import nn as nn\nfrom torch.nn import functional as F\n\nfrom mmdet3d.core.bbox.structures import rotation_3d_in_axis\nfrom mmdet3d.models.builder import HEADS, build_loss\nfrom mmdet.core import multi_apply\n\n\n@HEADS.register_module()\nclass PointwiseSemanticHead(BaseModule):\n    \"\"\"Semantic segmentation head for point-wise segmentation.\n\n    Predict point-wise segmentation and part regression results for PartA2.\n    See `paper <https://arxiv.org/abs/1907.03670>`_ for more details.\n\n    Args:\n        in_channels (int): The number of input channel.\n        num_classes (int): The number of class.\n        extra_width (float): Boxes enlarge width.\n        loss_seg (dict): Config of segmentation loss.\n        loss_part (dict): Config of part prediction loss.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 num_classes=3,\n                 extra_width=0.2,\n                 seg_score_thr=0.3,\n                 init_cfg=None,\n                 loss_seg=dict(\n                     type='FocalLoss',\n                     use_sigmoid=True,\n                     reduction='sum',\n                     gamma=2.0,\n                     alpha=0.25,\n                     loss_weight=1.0),\n                 loss_part=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     loss_weight=1.0)):\n        super(PointwiseSemanticHead, self).__init__(init_cfg=init_cfg)\n        self.extra_width = extra_width\n        self.num_classes = num_classes\n        self.seg_score_thr = seg_score_thr\n        self.seg_cls_layer = nn.Linear(in_channels, 1, bias=True)\n        self.seg_reg_layer = nn.Linear(in_channels, 3, bias=True)\n\n        self.loss_seg = build_loss(loss_seg)\n        self.loss_part = build_loss(loss_part)\n\n    def forward(self, x):\n        \"\"\"Forward pass.\n\n        Args:\n            x (torch.Tensor): Features from the first stage.\n\n        Returns:\n            dict: Part features, segmentation and part predictions.\n\n                - seg_preds (torch.Tensor): Segment predictions.\n                - part_preds (torch.Tensor): Part predictions.\n                - part_feats (torch.Tensor): Feature predictions.\n        \"\"\"\n        seg_preds = self.seg_cls_layer(x)  # (N, 1)\n        part_preds = self.seg_reg_layer(x)  # (N, 3)\n\n        seg_scores = torch.sigmoid(seg_preds).detach()\n        seg_mask = (seg_scores > self.seg_score_thr)\n\n        part_offsets = torch.sigmoid(part_preds).clone().detach()\n        part_offsets[seg_mask.view(-1) == 0] = 0\n        part_feats = torch.cat((part_offsets, seg_scores),\n                               dim=-1)  # shape (npoints, 4)\n        return dict(\n            seg_preds=seg_preds, part_preds=part_preds, part_feats=part_feats)\n\n    def get_targets_single(self, voxel_centers, gt_bboxes_3d, gt_labels_3d):\n        \"\"\"generate segmentation and part prediction targets for a single\n        sample.\n\n        Args:\n            voxel_centers (torch.Tensor): The center of voxels in shape\n                (voxel_num, 3).\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in\n                shape (box_num, 7).\n            gt_labels_3d (torch.Tensor): Class labels of ground truths in\n                shape (box_num).\n\n        Returns:\n            tuple[torch.Tensor]: Segmentation targets with shape [voxel_num]\n                part prediction targets with shape [voxel_num, 3]\n        \"\"\"\n        gt_bboxes_3d = gt_bboxes_3d.to(voxel_centers.device)\n        enlarged_gt_boxes = gt_bboxes_3d.enlarged_box(self.extra_width)\n\n        part_targets = voxel_centers.new_zeros((voxel_centers.shape[0], 3),\n                                               dtype=torch.float32)\n        box_idx = gt_bboxes_3d.points_in_boxes_part(voxel_centers)\n        enlarge_box_idx = enlarged_gt_boxes.points_in_boxes_part(\n            voxel_centers).long()\n\n        gt_labels_pad = F.pad(\n            gt_labels_3d, (1, 0), mode='constant', value=self.num_classes)\n        seg_targets = gt_labels_pad[(box_idx.long() + 1)]\n        fg_pt_flag = box_idx > -1\n        ignore_flag = fg_pt_flag ^ (enlarge_box_idx > -1)\n        seg_targets[ignore_flag] = -1\n\n        for k in range(len(gt_bboxes_3d)):\n            k_box_flag = box_idx == k\n            # no point in current box (caused by velodyne reduce)\n            if not k_box_flag.any():\n                continue\n            fg_voxels = voxel_centers[k_box_flag]\n            transformed_voxels = fg_voxels - gt_bboxes_3d.bottom_center[k]\n            transformed_voxels = rotation_3d_in_axis(\n                transformed_voxels.unsqueeze(0),\n                -gt_bboxes_3d.yaw[k].view(1),\n                axis=2)\n            part_targets[k_box_flag] = transformed_voxels / gt_bboxes_3d.dims[\n                k] + voxel_centers.new_tensor([0.5, 0.5, 0])\n\n        part_targets = torch.clamp(part_targets, min=0)\n        return seg_targets, part_targets\n\n    def get_targets(self, voxels_dict, gt_bboxes_3d, gt_labels_3d):\n        \"\"\"generate segmentation and part prediction targets.\n\n        Args:\n            voxel_centers (torch.Tensor): The center of voxels in shape\n                (voxel_num, 3).\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in\n                shape (box_num, 7).\n            gt_labels_3d (torch.Tensor): Class labels of ground truths in\n                shape (box_num).\n\n        Returns:\n            dict: Prediction targets\n\n                - seg_targets (torch.Tensor): Segmentation targets\n                    with shape [voxel_num].\n                - part_targets (torch.Tensor): Part prediction targets\n                    with shape [voxel_num, 3].\n        \"\"\"\n        batch_size = len(gt_labels_3d)\n        voxel_center_list = []\n        for idx in range(batch_size):\n            coords_idx = voxels_dict['coors'][:, 0] == idx\n            voxel_center_list.append(voxels_dict['voxel_centers'][coords_idx])\n\n        seg_targets, part_targets = multi_apply(self.get_targets_single,\n                                                voxel_center_list,\n                                                gt_bboxes_3d, gt_labels_3d)\n        seg_targets = torch.cat(seg_targets, dim=0)\n        part_targets = torch.cat(part_targets, dim=0)\n        return dict(seg_targets=seg_targets, part_targets=part_targets)\n\n    def loss(self, semantic_results, semantic_targets):\n        \"\"\"Calculate point-wise segmentation and part prediction losses.\n\n        Args:\n            semantic_results (dict): Results from semantic head.\n\n                - seg_preds: Segmentation predictions.\n                - part_preds: Part predictions.\n\n            semantic_targets (dict): Targets of semantic results.\n\n                - seg_preds: Segmentation targets.\n                - part_preds: Part targets.\n\n        Returns:\n            dict: Loss of segmentation and part prediction.\n\n                - loss_seg (torch.Tensor): Segmentation prediction loss.\n                - loss_part (torch.Tensor): Part prediction loss.\n        \"\"\"\n        seg_preds = semantic_results['seg_preds']\n        part_preds = semantic_results['part_preds']\n        seg_targets = semantic_targets['seg_targets']\n        part_targets = semantic_targets['part_targets']\n\n        pos_mask = (seg_targets > -1) & (seg_targets < self.num_classes)\n        binary_seg_target = pos_mask.long()\n        pos = pos_mask.float()\n        neg = (seg_targets == self.num_classes).float()\n        seg_weights = pos + neg\n        pos_normalizer = pos.sum()\n        seg_weights = seg_weights / torch.clamp(pos_normalizer, min=1.0)\n        loss_seg = self.loss_seg(seg_preds, binary_seg_target, seg_weights)\n\n        if pos_normalizer > 0:\n            loss_part = self.loss_part(part_preds[pos_mask],\n                                       part_targets[pos_mask])\n        else:\n            # fake a part loss\n            loss_part = loss_seg.new_tensor(0)\n\n        return dict(loss_seg=loss_seg, loss_part=loss_part)\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/mask_heads/primitive_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.cnn import ConvModule\nfrom mmcv.ops import furthest_point_sample\nfrom mmcv.runner import BaseModule\nfrom torch import nn as nn\nfrom torch.nn import functional as F\n\nfrom mmdet3d.models.builder import HEADS, build_loss\nfrom mmdet3d.models.model_utils import VoteModule\nfrom mmdet3d.ops import build_sa_module\nfrom mmdet.core import multi_apply\n\n\n@HEADS.register_module()\nclass PrimitiveHead(BaseModule):\n    r\"\"\"Primitive head of `H3DNet <https://arxiv.org/abs/2006.05682>`_.\n\n    Args:\n        num_dims (int): The dimension of primitive semantic information.\n        num_classes (int): The number of class.\n        primitive_mode (str): The mode of primitive module,\n            available mode ['z', 'xy', 'line'].\n        bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for encoding and\n            decoding boxes.\n        train_cfg (dict): Config for training.\n        test_cfg (dict): Config for testing.\n        vote_module_cfg (dict): Config of VoteModule for point-wise votes.\n        vote_aggregation_cfg (dict): Config of vote aggregation layer.\n        feat_channels (tuple[int]): Convolution channels of\n            prediction layer.\n        upper_thresh (float): Threshold for line matching.\n        surface_thresh (float): Threshold for surface matching.\n        conv_cfg (dict): Config of convolution in prediction layer.\n        norm_cfg (dict): Config of BN in prediction layer.\n        objectness_loss (dict): Config of objectness loss.\n        center_loss (dict): Config of center loss.\n        semantic_loss (dict): Config of point-wise semantic segmentation loss.\n    \"\"\"\n\n    def __init__(self,\n                 num_dims,\n                 num_classes,\n                 primitive_mode,\n                 train_cfg=None,\n                 test_cfg=None,\n                 vote_module_cfg=None,\n                 vote_aggregation_cfg=None,\n                 feat_channels=(128, 128),\n                 upper_thresh=100.0,\n                 surface_thresh=0.5,\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=dict(type='BN1d'),\n                 objectness_loss=None,\n                 center_loss=None,\n                 semantic_reg_loss=None,\n                 semantic_cls_loss=None,\n                 init_cfg=None):\n        super(PrimitiveHead, self).__init__(init_cfg=init_cfg)\n        assert primitive_mode in ['z', 'xy', 'line']\n        # The dimension of primitive semantic information.\n        self.num_dims = num_dims\n        self.num_classes = num_classes\n        self.primitive_mode = primitive_mode\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.gt_per_seed = vote_module_cfg['gt_per_seed']\n        self.num_proposal = vote_aggregation_cfg['num_point']\n        self.upper_thresh = upper_thresh\n        self.surface_thresh = surface_thresh\n\n        self.objectness_loss = build_loss(objectness_loss)\n        self.center_loss = build_loss(center_loss)\n        self.semantic_reg_loss = build_loss(semantic_reg_loss)\n        self.semantic_cls_loss = build_loss(semantic_cls_loss)\n\n        assert vote_aggregation_cfg['mlp_channels'][0] == vote_module_cfg[\n            'in_channels']\n\n        # Primitive existence flag prediction\n        self.flag_conv = ConvModule(\n            vote_module_cfg['conv_channels'][-1],\n            vote_module_cfg['conv_channels'][-1] // 2,\n            1,\n            padding=0,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            bias=True,\n            inplace=True)\n        self.flag_pred = torch.nn.Conv1d(\n            vote_module_cfg['conv_channels'][-1] // 2, 2, 1)\n\n        self.vote_module = VoteModule(**vote_module_cfg)\n        self.vote_aggregation = build_sa_module(vote_aggregation_cfg)\n\n        prev_channel = vote_aggregation_cfg['mlp_channels'][-1]\n        conv_pred_list = list()\n        for k in range(len(feat_channels)):\n            conv_pred_list.append(\n                ConvModule(\n                    prev_channel,\n                    feat_channels[k],\n                    1,\n                    padding=0,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    bias=True,\n                    inplace=True))\n            prev_channel = feat_channels[k]\n        self.conv_pred = nn.Sequential(*conv_pred_list)\n\n        conv_out_channel = 3 + num_dims + num_classes\n        self.conv_pred.add_module('conv_out',\n                                  nn.Conv1d(prev_channel, conv_out_channel, 1))\n\n    def forward(self, feats_dict, sample_mod):\n        \"\"\"Forward pass.\n\n        Args:\n            feats_dict (dict): Feature dict from backbone.\n            sample_mod (str): Sample mode for vote aggregation layer.\n                valid modes are \"vote\", \"seed\" and \"random\".\n\n        Returns:\n            dict: Predictions of primitive head.\n        \"\"\"\n        assert sample_mod in ['vote', 'seed', 'random']\n\n        seed_points = feats_dict['fp_xyz_net0'][-1]\n        seed_features = feats_dict['hd_feature']\n        results = {}\n\n        primitive_flag = self.flag_conv(seed_features)\n        primitive_flag = self.flag_pred(primitive_flag)\n\n        results['pred_flag_' + self.primitive_mode] = primitive_flag\n\n        # 1. generate vote_points from seed_points\n        vote_points, vote_features, _ = self.vote_module(\n            seed_points, seed_features)\n        results['vote_' + self.primitive_mode] = vote_points\n        results['vote_features_' + self.primitive_mode] = vote_features\n\n        # 2. aggregate vote_points\n        if sample_mod == 'vote':\n            # use fps in vote_aggregation\n            sample_indices = None\n        elif sample_mod == 'seed':\n            # FPS on seed and choose the votes corresponding to the seeds\n            sample_indices = furthest_point_sample(seed_points,\n                                                   self.num_proposal)\n        elif sample_mod == 'random':\n            # Random sampling from the votes\n            batch_size, num_seed = seed_points.shape[:2]\n            sample_indices = torch.randint(\n                0,\n                num_seed, (batch_size, self.num_proposal),\n                dtype=torch.int32,\n                device=seed_points.device)\n        else:\n            raise NotImplementedError('Unsupported sample mod!')\n\n        vote_aggregation_ret = self.vote_aggregation(vote_points,\n                                                     vote_features,\n                                                     sample_indices)\n        aggregated_points, features, aggregated_indices = vote_aggregation_ret\n        results['aggregated_points_' + self.primitive_mode] = aggregated_points\n        results['aggregated_features_' + self.primitive_mode] = features\n        results['aggregated_indices_' +\n                self.primitive_mode] = aggregated_indices\n\n        # 3. predict primitive offsets and semantic information\n        predictions = self.conv_pred(features)\n\n        # 4. decode predictions\n        decode_ret = self.primitive_decode_scores(predictions,\n                                                  aggregated_points)\n        results.update(decode_ret)\n\n        center, pred_ind = self.get_primitive_center(\n            primitive_flag, decode_ret['center_' + self.primitive_mode])\n\n        results['pred_' + self.primitive_mode + '_ind'] = pred_ind\n        results['pred_' + self.primitive_mode + '_center'] = center\n        return results\n\n    def loss(self,\n             bbox_preds,\n             points,\n             gt_bboxes_3d,\n             gt_labels_3d,\n             pts_semantic_mask=None,\n             pts_instance_mask=None,\n             img_metas=None,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute loss.\n\n        Args:\n            bbox_preds (dict): Predictions from forward of primitive head.\n            points (list[torch.Tensor]): Input points.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                bboxes of each sample.\n            gt_labels_3d (list[torch.Tensor]): Labels of each sample.\n            pts_semantic_mask (list[torch.Tensor]): Point-wise\n                semantic mask.\n            pts_instance_mask (list[torch.Tensor]): Point-wise\n                instance mask.\n            img_metas (list[dict]): Contain pcd and img's meta info.\n            gt_bboxes_ignore (list[torch.Tensor]): Specify\n                which bounding.\n\n        Returns:\n            dict: Losses of Primitive Head.\n        \"\"\"\n        targets = self.get_targets(points, gt_bboxes_3d, gt_labels_3d,\n                                   pts_semantic_mask, pts_instance_mask,\n                                   bbox_preds)\n\n        (point_mask, point_offset, gt_primitive_center, gt_primitive_semantic,\n         gt_sem_cls_label, gt_primitive_mask) = targets\n\n        losses = {}\n        # Compute the loss of primitive existence flag\n        pred_flag = bbox_preds['pred_flag_' + self.primitive_mode]\n        flag_loss = self.objectness_loss(pred_flag, gt_primitive_mask.long())\n        losses['flag_loss_' + self.primitive_mode] = flag_loss\n\n        # calculate vote loss\n        vote_loss = self.vote_module.get_loss(\n            bbox_preds['seed_points'],\n            bbox_preds['vote_' + self.primitive_mode],\n            bbox_preds['seed_indices'], point_mask, point_offset)\n        losses['vote_loss_' + self.primitive_mode] = vote_loss\n\n        num_proposal = bbox_preds['aggregated_points_' +\n                                  self.primitive_mode].shape[1]\n        primitive_center = bbox_preds['center_' + self.primitive_mode]\n        if self.primitive_mode != 'line':\n            primitive_semantic = bbox_preds['size_residuals_' +\n                                            self.primitive_mode].contiguous()\n        else:\n            primitive_semantic = None\n        semancitc_scores = bbox_preds['sem_cls_scores_' +\n                                      self.primitive_mode].transpose(2, 1)\n\n        gt_primitive_mask = gt_primitive_mask / \\\n            (gt_primitive_mask.sum() + 1e-6)\n        center_loss, size_loss, sem_cls_loss = self.compute_primitive_loss(\n            primitive_center, primitive_semantic, semancitc_scores,\n            num_proposal, gt_primitive_center, gt_primitive_semantic,\n            gt_sem_cls_label, gt_primitive_mask)\n        losses['center_loss_' + self.primitive_mode] = center_loss\n        losses['size_loss_' + self.primitive_mode] = size_loss\n        losses['sem_loss_' + self.primitive_mode] = sem_cls_loss\n\n        return losses\n\n    def get_targets(self,\n                    points,\n                    gt_bboxes_3d,\n                    gt_labels_3d,\n                    pts_semantic_mask=None,\n                    pts_instance_mask=None,\n                    bbox_preds=None):\n        \"\"\"Generate targets of primitive head.\n\n        Args:\n            points (list[torch.Tensor]): Points of each batch.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                bboxes of each batch.\n            gt_labels_3d (list[torch.Tensor]): Labels of each batch.\n            pts_semantic_mask (list[torch.Tensor]): Point-wise semantic\n                label of each batch.\n            pts_instance_mask (list[torch.Tensor]): Point-wise instance\n                label of each batch.\n            bbox_preds (dict): Predictions from forward of primitive head.\n\n        Returns:\n            tuple[torch.Tensor]: Targets of primitive head.\n        \"\"\"\n        for index in range(len(gt_labels_3d)):\n            if len(gt_labels_3d[index]) == 0:\n                fake_box = gt_bboxes_3d[index].tensor.new_zeros(\n                    1, gt_bboxes_3d[index].tensor.shape[-1])\n                gt_bboxes_3d[index] = gt_bboxes_3d[index].new_box(fake_box)\n                gt_labels_3d[index] = gt_labels_3d[index].new_zeros(1)\n\n        if pts_semantic_mask is None:\n            pts_semantic_mask = [None for i in range(len(gt_labels_3d))]\n            pts_instance_mask = [None for i in range(len(gt_labels_3d))]\n\n        (point_mask, point_sem,\n         point_offset) = multi_apply(self.get_targets_single, points,\n                                     gt_bboxes_3d, gt_labels_3d,\n                                     pts_semantic_mask, pts_instance_mask)\n\n        point_mask = torch.stack(point_mask)\n        point_sem = torch.stack(point_sem)\n        point_offset = torch.stack(point_offset)\n\n        batch_size = point_mask.shape[0]\n        num_proposal = bbox_preds['aggregated_points_' +\n                                  self.primitive_mode].shape[1]\n        num_seed = bbox_preds['seed_points'].shape[1]\n        seed_inds = bbox_preds['seed_indices'].long()\n        seed_inds_expand = seed_inds.view(batch_size, num_seed,\n                                          1).repeat(1, 1, 3)\n        seed_gt_votes = torch.gather(point_offset, 1, seed_inds_expand)\n        seed_gt_votes += bbox_preds['seed_points']\n        gt_primitive_center = seed_gt_votes.view(batch_size * num_proposal, 1,\n                                                 3)\n\n        seed_inds_expand_sem = seed_inds.view(batch_size, num_seed, 1).repeat(\n            1, 1, 4 + self.num_dims)\n        seed_gt_sem = torch.gather(point_sem, 1, seed_inds_expand_sem)\n        gt_primitive_semantic = seed_gt_sem[:, :, 3:3 + self.num_dims].view(\n            batch_size * num_proposal, 1, self.num_dims).contiguous()\n\n        gt_sem_cls_label = seed_gt_sem[:, :, -1].long()\n\n        gt_votes_mask = torch.gather(point_mask, 1, seed_inds)\n\n        return (point_mask, point_offset, gt_primitive_center,\n                gt_primitive_semantic, gt_sem_cls_label, gt_votes_mask)\n\n    def get_targets_single(self,\n                           points,\n                           gt_bboxes_3d,\n                           gt_labels_3d,\n                           pts_semantic_mask=None,\n                           pts_instance_mask=None):\n        \"\"\"Generate targets of primitive head for single batch.\n\n        Args:\n            points (torch.Tensor): Points of each batch.\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth\n                boxes of each batch.\n            gt_labels_3d (torch.Tensor): Labels of each batch.\n            pts_semantic_mask (torch.Tensor): Point-wise semantic\n                label of each batch.\n            pts_instance_mask (torch.Tensor): Point-wise instance\n                label of each batch.\n\n        Returns:\n            tuple[torch.Tensor]: Targets of primitive head.\n        \"\"\"\n        gt_bboxes_3d = gt_bboxes_3d.to(points.device)\n        num_points = points.shape[0]\n\n        point_mask = points.new_zeros(num_points)\n        # Offset to the primitive center\n        point_offset = points.new_zeros([num_points, 3])\n        # Semantic information of primitive center\n        point_sem = points.new_zeros([num_points, 3 + self.num_dims + 1])\n\n        # Generate pts_semantic_mask and pts_instance_mask when they are None\n        if pts_semantic_mask is None or pts_instance_mask is None:\n            points2box_mask = gt_bboxes_3d.points_in_boxes_all(points)\n            assignment = points2box_mask.argmax(1)\n            background_mask = points2box_mask.max(1)[0] == 0\n\n            if pts_semantic_mask is None:\n                pts_semantic_mask = gt_labels_3d[assignment]\n                pts_semantic_mask[background_mask] = self.num_classes\n\n            if pts_instance_mask is None:\n                pts_instance_mask = assignment\n                pts_instance_mask[background_mask] = gt_labels_3d.shape[0]\n\n        instance_flag = torch.nonzero(\n            pts_semantic_mask != self.num_classes, as_tuple=False).squeeze(1)\n        instance_labels = pts_instance_mask[instance_flag].unique()\n\n        with_yaw = gt_bboxes_3d.with_yaw\n        for i, i_instance in enumerate(instance_labels):\n            indices = instance_flag[pts_instance_mask[instance_flag] ==\n                                    i_instance]\n            coords = points[indices, :3]\n            cur_cls_label = pts_semantic_mask[indices][0]\n\n            # Bbox Corners\n            cur_corners = gt_bboxes_3d.corners[i]\n\n            plane_lower_temp = points.new_tensor(\n                [0, 0, 1, -cur_corners[7, -1]])\n            upper_points = cur_corners[[1, 2, 5, 6]]\n            refined_distance = (upper_points * plane_lower_temp[:3]).sum(dim=1)\n\n            if self.check_horizon(upper_points) and \\\n                    plane_lower_temp[0] + plane_lower_temp[1] < \\\n                    self.train_cfg['lower_thresh']:\n                plane_lower = points.new_tensor(\n                    [0, 0, 1, plane_lower_temp[-1]])\n                plane_upper = points.new_tensor(\n                    [0, 0, 1, -torch.mean(refined_distance)])\n            else:\n                raise NotImplementedError('Only horizontal plane is support!')\n\n            if self.check_dist(plane_upper, upper_points) is False:\n                raise NotImplementedError(\n                    'Mean distance to plane should be lower than thresh!')\n\n            # Get the boundary points here\n            point2plane_dist, selected = self.match_point2plane(\n                plane_lower, coords)\n\n            # Get bottom four lines\n            if self.primitive_mode == 'line':\n                point2line_matching = self.match_point2line(\n                    coords[selected], cur_corners, with_yaw, mode='bottom')\n\n                point_mask, point_offset, point_sem = \\\n                    self._assign_primitive_line_targets(point_mask,\n                                                        point_offset,\n                                                        point_sem,\n                                                        coords[selected],\n                                                        indices[selected],\n                                                        cur_cls_label,\n                                                        point2line_matching,\n                                                        cur_corners,\n                                                        [1, 1, 0, 0],\n                                                        with_yaw,\n                                                        mode='bottom')\n\n            # Set the surface labels here\n            if self.primitive_mode == 'z' and \\\n                    selected.sum() > self.train_cfg['num_point'] and \\\n                    point2plane_dist[selected].var() < \\\n                    self.train_cfg['var_thresh']:\n\n                point_mask, point_offset, point_sem = \\\n                    self._assign_primitive_surface_targets(point_mask,\n                                                           point_offset,\n                                                           point_sem,\n                                                           coords[selected],\n                                                           indices[selected],\n                                                           cur_cls_label,\n                                                           cur_corners,\n                                                           with_yaw,\n                                                           mode='bottom')\n\n            # Get the boundary points here\n            point2plane_dist, selected = self.match_point2plane(\n                plane_upper, coords)\n\n            # Get top four lines\n            if self.primitive_mode == 'line':\n                point2line_matching = self.match_point2line(\n                    coords[selected], cur_corners, with_yaw, mode='top')\n\n                point_mask, point_offset, point_sem = \\\n                    self._assign_primitive_line_targets(point_mask,\n                                                        point_offset,\n                                                        point_sem,\n                                                        coords[selected],\n                                                        indices[selected],\n                                                        cur_cls_label,\n                                                        point2line_matching,\n                                                        cur_corners,\n                                                        [1, 1, 0, 0],\n                                                        with_yaw,\n                                                        mode='top')\n\n            if self.primitive_mode == 'z' and \\\n                    selected.sum() > self.train_cfg['num_point'] and \\\n                    point2plane_dist[selected].var() < \\\n                    self.train_cfg['var_thresh']:\n\n                point_mask, point_offset, point_sem = \\\n                    self._assign_primitive_surface_targets(point_mask,\n                                                           point_offset,\n                                                           point_sem,\n                                                           coords[selected],\n                                                           indices[selected],\n                                                           cur_cls_label,\n                                                           cur_corners,\n                                                           with_yaw,\n                                                           mode='top')\n\n            # Get left two lines\n            plane_left_temp = self._get_plane_fomulation(\n                cur_corners[2] - cur_corners[3],\n                cur_corners[3] - cur_corners[0], cur_corners[0])\n\n            right_points = cur_corners[[4, 5, 7, 6]]\n            plane_left_temp /= torch.norm(plane_left_temp[:3])\n            refined_distance = (right_points * plane_left_temp[:3]).sum(dim=1)\n\n            if plane_left_temp[2] < self.train_cfg['lower_thresh']:\n                plane_left = plane_left_temp\n                plane_right = points.new_tensor([\n                    plane_left_temp[0], plane_left_temp[1], plane_left_temp[2],\n                    -refined_distance.mean()\n                ])\n            else:\n                raise NotImplementedError(\n                    'Normal vector of the plane should be horizontal!')\n\n            # Get the boundary points here\n            point2plane_dist, selected = self.match_point2plane(\n                plane_left, coords)\n\n            # Get left four lines\n            if self.primitive_mode == 'line':\n                point2line_matching = self.match_point2line(\n                    coords[selected], cur_corners, with_yaw, mode='left')\n                point_mask, point_offset, point_sem = \\\n                    self._assign_primitive_line_targets(\n                        point_mask, point_offset, point_sem,\n                        coords[selected], indices[selected], cur_cls_label,\n                        point2line_matching[2:], cur_corners, [2, 2],\n                        with_yaw, mode='left')\n\n            if self.primitive_mode == 'xy' and \\\n                    selected.sum() > self.train_cfg['num_point'] and \\\n                    point2plane_dist[selected].var() < \\\n                    self.train_cfg['var_thresh']:\n\n                point_mask, point_offset, point_sem = \\\n                    self._assign_primitive_surface_targets(\n                        point_mask, point_offset, point_sem,\n                        coords[selected], indices[selected], cur_cls_label,\n                        cur_corners, with_yaw, mode='left')\n\n            # Get the boundary points here\n            point2plane_dist, selected = self.match_point2plane(\n                plane_right, coords)\n\n            # Get right four lines\n            if self.primitive_mode == 'line':\n                point2line_matching = self.match_point2line(\n                    coords[selected], cur_corners, with_yaw, mode='right')\n\n                point_mask, point_offset, point_sem = \\\n                    self._assign_primitive_line_targets(\n                        point_mask, point_offset, point_sem,\n                        coords[selected], indices[selected], cur_cls_label,\n                        point2line_matching[2:], cur_corners, [2, 2],\n                        with_yaw, mode='right')\n\n            if self.primitive_mode == 'xy' and \\\n                    selected.sum() > self.train_cfg['num_point'] and \\\n                    point2plane_dist[selected].var() < \\\n                    self.train_cfg['var_thresh']:\n\n                point_mask, point_offset, point_sem = \\\n                    self._assign_primitive_surface_targets(\n                        point_mask, point_offset, point_sem,\n                        coords[selected], indices[selected], cur_cls_label,\n                        cur_corners, with_yaw, mode='right')\n\n            plane_front_temp = self._get_plane_fomulation(\n                cur_corners[0] - cur_corners[4],\n                cur_corners[4] - cur_corners[5], cur_corners[5])\n\n            back_points = cur_corners[[3, 2, 7, 6]]\n            plane_front_temp /= torch.norm(plane_front_temp[:3])\n            refined_distance = (back_points * plane_front_temp[:3]).sum(dim=1)\n\n            if plane_front_temp[2] < self.train_cfg['lower_thresh']:\n                plane_front = plane_front_temp\n                plane_back = points.new_tensor([\n                    plane_front_temp[0], plane_front_temp[1],\n                    plane_front_temp[2], -torch.mean(refined_distance)\n                ])\n            else:\n                raise NotImplementedError(\n                    'Normal vector of the plane should be horizontal!')\n\n            # Get the boundary points here\n            point2plane_dist, selected = self.match_point2plane(\n                plane_front, coords)\n\n            if self.primitive_mode == 'xy' and \\\n                    selected.sum() > self.train_cfg['num_point'] and \\\n                    (point2plane_dist[selected]).var() < \\\n                    self.train_cfg['var_thresh']:\n\n                point_mask, point_offset, point_sem = \\\n                    self._assign_primitive_surface_targets(\n                        point_mask, point_offset, point_sem,\n                        coords[selected], indices[selected], cur_cls_label,\n                        cur_corners, with_yaw, mode='front')\n\n            # Get the boundary points here\n            point2plane_dist, selected = self.match_point2plane(\n                plane_back, coords)\n\n            if self.primitive_mode == 'xy' and \\\n                    selected.sum() > self.train_cfg['num_point'] and \\\n                    point2plane_dist[selected].var() < \\\n                    self.train_cfg['var_thresh']:\n\n                point_mask, point_offset, point_sem = \\\n                    self._assign_primitive_surface_targets(\n                        point_mask, point_offset, point_sem,\n                        coords[selected], indices[selected], cur_cls_label,\n                        cur_corners, with_yaw, mode='back')\n\n        return (point_mask, point_sem, point_offset)\n\n    def primitive_decode_scores(self, predictions, aggregated_points):\n        \"\"\"Decode predicted parts to primitive head.\n\n        Args:\n            predictions (torch.Tensor): primitive pridictions of each batch.\n            aggregated_points (torch.Tensor): The aggregated points\n                of vote stage.\n\n        Returns:\n            Dict: Predictions of primitive head, including center,\n                semantic size and semantic scores.\n        \"\"\"\n\n        ret_dict = {}\n        pred_transposed = predictions.transpose(2, 1)\n\n        center = aggregated_points + pred_transposed[:, :, 0:3]\n        ret_dict['center_' + self.primitive_mode] = center\n\n        if self.primitive_mode in ['z', 'xy']:\n            ret_dict['size_residuals_' + self.primitive_mode] = \\\n                pred_transposed[:, :, 3:3 + self.num_dims]\n\n        ret_dict['sem_cls_scores_' + self.primitive_mode] = \\\n            pred_transposed[:, :, 3 + self.num_dims:]\n\n        return ret_dict\n\n    def check_horizon(self, points):\n        \"\"\"Check whether is a horizontal plane.\n\n        Args:\n            points (torch.Tensor): Points of input.\n\n        Returns:\n            Bool: Flag of result.\n        \"\"\"\n        return (points[0][-1] == points[1][-1]) and \\\n               (points[1][-1] == points[2][-1]) and \\\n               (points[2][-1] == points[3][-1])\n\n    def check_dist(self, plane_equ, points):\n        \"\"\"Whether the mean of points to plane distance is lower than thresh.\n\n        Args:\n            plane_equ (torch.Tensor): Plane to be checked.\n            points (torch.Tensor): Points to be checked.\n\n        Returns:\n            Tuple: Flag of result.\n        \"\"\"\n        return (points[:, 2] +\n                plane_equ[-1]).sum() / 4.0 < self.train_cfg['lower_thresh']\n\n    def point2line_dist(self, points, pts_a, pts_b):\n        \"\"\"Calculate the distance from point to line.\n\n        Args:\n            points (torch.Tensor): Points of input.\n            pts_a (torch.Tensor): Point on the specific line.\n            pts_b (torch.Tensor): Point on the specific line.\n\n        Returns:\n            torch.Tensor: Distance between each point to line.\n        \"\"\"\n        line_a2b = pts_b - pts_a\n        line_a2pts = points - pts_a\n        length = (line_a2pts * line_a2b.view(1, 3)).sum(1) / \\\n            line_a2b.norm()\n        dist = (line_a2pts.norm(dim=1)**2 - length**2).sqrt()\n\n        return dist\n\n    def match_point2line(self, points, corners, with_yaw, mode='bottom'):\n        \"\"\"Match points to corresponding line.\n\n        Args:\n            points (torch.Tensor): Points of input.\n            corners (torch.Tensor): Eight corners of a bounding box.\n            with_yaw (Bool): Whether the boundind box is with rotation.\n            mode (str, optional): Specify which line should be matched,\n                available mode are ('bottom', 'top', 'left', 'right').\n                Defaults to 'bottom'.\n\n        Returns:\n            Tuple: Flag of matching correspondence.\n        \"\"\"\n        if with_yaw:\n            corners_pair = {\n                'bottom': [[0, 3], [4, 7], [0, 4], [3, 7]],\n                'top': [[1, 2], [5, 6], [1, 5], [2, 6]],\n                'left': [[0, 1], [3, 2], [0, 1], [3, 2]],\n                'right': [[4, 5], [7, 6], [4, 5], [7, 6]]\n            }\n            selected_list = []\n            for pair_index in corners_pair[mode]:\n                selected = self.point2line_dist(\n                    points, corners[pair_index[0]], corners[pair_index[1]]) \\\n                    < self.train_cfg['line_thresh']\n                selected_list.append(selected)\n        else:\n            xmin, ymin, _ = corners.min(0)[0]\n            xmax, ymax, _ = corners.max(0)[0]\n            sel1 = torch.abs(points[:, 0] -\n                             xmin) < self.train_cfg['line_thresh']\n            sel2 = torch.abs(points[:, 0] -\n                             xmax) < self.train_cfg['line_thresh']\n            sel3 = torch.abs(points[:, 1] -\n                             ymin) < self.train_cfg['line_thresh']\n            sel4 = torch.abs(points[:, 1] -\n                             ymax) < self.train_cfg['line_thresh']\n            selected_list = [sel1, sel2, sel3, sel4]\n        return selected_list\n\n    def match_point2plane(self, plane, points):\n        \"\"\"Match points to plane.\n\n        Args:\n            plane (torch.Tensor): Equation of the plane.\n            points (torch.Tensor): Points of input.\n\n        Returns:\n            Tuple: Distance of each point to the plane and\n                flag of matching correspondence.\n        \"\"\"\n        point2plane_dist = torch.abs((points * plane[:3]).sum(dim=1) +\n                                     plane[-1])\n        min_dist = point2plane_dist.min()\n        selected = torch.abs(point2plane_dist -\n                             min_dist) < self.train_cfg['dist_thresh']\n        return point2plane_dist, selected\n\n    def compute_primitive_loss(self, primitive_center, primitive_semantic,\n                               semantic_scores, num_proposal,\n                               gt_primitive_center, gt_primitive_semantic,\n                               gt_sem_cls_label, gt_primitive_mask):\n        \"\"\"Compute loss of primitive module.\n\n        Args:\n            primitive_center (torch.Tensor): Pridictions of primitive center.\n            primitive_semantic (torch.Tensor): Pridictions of primitive\n                semantic.\n            semantic_scores (torch.Tensor): Pridictions of primitive\n                semantic scores.\n            num_proposal (int): The number of primitive proposal.\n            gt_primitive_center (torch.Tensor): Ground truth of\n                primitive center.\n            gt_votes_sem (torch.Tensor): Ground truth of primitive semantic.\n            gt_sem_cls_label (torch.Tensor): Ground truth of primitive\n                semantic class.\n            gt_primitive_mask (torch.Tensor): Ground truth of primitive mask.\n\n        Returns:\n            Tuple: Loss of primitive module.\n        \"\"\"\n        batch_size = primitive_center.shape[0]\n        vote_xyz_reshape = primitive_center.view(batch_size * num_proposal, -1,\n                                                 3)\n\n        center_loss = self.center_loss(\n            vote_xyz_reshape,\n            gt_primitive_center,\n            dst_weight=gt_primitive_mask.view(batch_size * num_proposal, 1))[1]\n\n        if self.primitive_mode != 'line':\n            size_xyz_reshape = primitive_semantic.view(\n                batch_size * num_proposal, -1, self.num_dims).contiguous()\n            size_loss = self.semantic_reg_loss(\n                size_xyz_reshape,\n                gt_primitive_semantic,\n                dst_weight=gt_primitive_mask.view(batch_size * num_proposal,\n                                                  1))[1]\n        else:\n            size_loss = center_loss.new_tensor(0.0)\n\n        # Semantic cls loss\n        sem_cls_loss = self.semantic_cls_loss(\n            semantic_scores, gt_sem_cls_label, weight=gt_primitive_mask)\n\n        return center_loss, size_loss, sem_cls_loss\n\n    def get_primitive_center(self, pred_flag, center):\n        \"\"\"Generate primitive center from predictions.\n\n        Args:\n            pred_flag (torch.Tensor): Scores of primitive center.\n            center (torch.Tensor): Pridictions of primitive center.\n\n        Returns:\n            Tuple: Primitive center and the prediction indices.\n        \"\"\"\n        ind_normal = F.softmax(pred_flag, dim=1)\n        pred_indices = (ind_normal[:, 1, :] >\n                        self.surface_thresh).detach().float()\n        selected = (ind_normal[:, 1, :] <=\n                    self.surface_thresh).detach().float()\n        offset = torch.ones_like(center) * self.upper_thresh\n        center = center + offset * selected.unsqueeze(-1)\n        return center, pred_indices\n\n    def _assign_primitive_line_targets(self,\n                                       point_mask,\n                                       point_offset,\n                                       point_sem,\n                                       coords,\n                                       indices,\n                                       cls_label,\n                                       point2line_matching,\n                                       corners,\n                                       center_axises,\n                                       with_yaw,\n                                       mode='bottom'):\n        \"\"\"Generate targets of line primitive.\n\n        Args:\n            point_mask (torch.Tensor): Tensor to store the ground\n                truth of mask.\n            point_offset (torch.Tensor): Tensor to store the ground\n                truth of offset.\n            point_sem (torch.Tensor): Tensor to store the ground\n                truth of semantic.\n            coords (torch.Tensor): The selected points.\n            indices (torch.Tensor): Indices of the selected points.\n            cls_label (int): Class label of the ground truth bounding box.\n            point2line_matching (torch.Tensor): Flag indicate that\n                matching line of each point.\n            corners (torch.Tensor): Corners of the ground truth bounding box.\n            center_axises (list[int]): Indicate in which axis the line center\n                should be refined.\n            with_yaw (Bool): Whether the boundind box is with rotation.\n            mode (str, optional): Specify which line should be matched,\n                available mode are ('bottom', 'top', 'left', 'right').\n                Defaults to 'bottom'.\n\n        Returns:\n            Tuple: Targets of the line primitive.\n        \"\"\"\n        corners_pair = {\n            'bottom': [[0, 3], [4, 7], [0, 4], [3, 7]],\n            'top': [[1, 2], [5, 6], [1, 5], [2, 6]],\n            'left': [[0, 1], [3, 2]],\n            'right': [[4, 5], [7, 6]]\n        }\n        corners_pair = corners_pair[mode]\n        assert len(corners_pair) == len(point2line_matching) == len(\n            center_axises)\n        for line_select, center_axis, pair_index in zip(\n                point2line_matching, center_axises, corners_pair):\n            if line_select.sum() > self.train_cfg['num_point_line']:\n                point_mask[indices[line_select]] = 1.0\n\n                if with_yaw:\n                    line_center = (corners[pair_index[0]] +\n                                   corners[pair_index[1]]) / 2\n                else:\n                    line_center = coords[line_select].mean(dim=0)\n                    line_center[center_axis] = corners[:, center_axis].mean()\n\n                point_offset[indices[line_select]] = \\\n                    line_center - coords[line_select]\n                point_sem[indices[line_select]] = \\\n                    point_sem.new_tensor([line_center[0], line_center[1],\n                                          line_center[2], cls_label])\n        return point_mask, point_offset, point_sem\n\n    def _assign_primitive_surface_targets(self,\n                                          point_mask,\n                                          point_offset,\n                                          point_sem,\n                                          coords,\n                                          indices,\n                                          cls_label,\n                                          corners,\n                                          with_yaw,\n                                          mode='bottom'):\n        \"\"\"Generate targets for primitive z and primitive xy.\n\n        Args:\n            point_mask (torch.Tensor): Tensor to store the ground\n                truth of mask.\n            point_offset (torch.Tensor): Tensor to store the ground\n                truth of offset.\n            point_sem (torch.Tensor): Tensor to store the ground\n                truth of semantic.\n            coords (torch.Tensor): The selected points.\n            indices (torch.Tensor): Indices of the selected points.\n            cls_label (int): Class label of the ground truth bounding box.\n            corners (torch.Tensor): Corners of the ground truth bounding box.\n            with_yaw (Bool): Whether the boundind box is with rotation.\n            mode (str, optional): Specify which line should be matched,\n                available mode are ('bottom', 'top', 'left', 'right',\n                'front', 'back').\n                Defaults to 'bottom'.\n\n        Returns:\n            Tuple: Targets of the center primitive.\n        \"\"\"\n        point_mask[indices] = 1.0\n        corners_pair = {\n            'bottom': [0, 7],\n            'top': [1, 6],\n            'left': [0, 1],\n            'right': [4, 5],\n            'front': [0, 1],\n            'back': [3, 2]\n        }\n        pair_index = corners_pair[mode]\n        if self.primitive_mode == 'z':\n            if with_yaw:\n                center = (corners[pair_index[0]] +\n                          corners[pair_index[1]]) / 2.0\n                center[2] = coords[:, 2].mean()\n                point_sem[indices] = point_sem.new_tensor([\n                    center[0], center[1],\n                    center[2], (corners[4] - corners[0]).norm(),\n                    (corners[3] - corners[0]).norm(), cls_label\n                ])\n            else:\n                center = point_mask.new_tensor([\n                    corners[:, 0].mean(), corners[:, 1].mean(),\n                    coords[:, 2].mean()\n                ])\n                point_sem[indices] = point_sem.new_tensor([\n                    center[0], center[1], center[2],\n                    corners[:, 0].max() - corners[:, 0].min(),\n                    corners[:, 1].max() - corners[:, 1].min(), cls_label\n                ])\n        elif self.primitive_mode == 'xy':\n            if with_yaw:\n                center = coords.mean(0)\n                center[2] = (corners[pair_index[0], 2] +\n                             corners[pair_index[1], 2]) / 2.0\n                point_sem[indices] = point_sem.new_tensor([\n                    center[0], center[1], center[2],\n                    corners[pair_index[1], 2] - corners[pair_index[0], 2],\n                    cls_label\n                ])\n            else:\n                center = point_mask.new_tensor([\n                    coords[:, 0].mean(), coords[:, 1].mean(),\n                    corners[:, 2].mean()\n                ])\n                point_sem[indices] = point_sem.new_tensor([\n                    center[0], center[1], center[2],\n                    corners[:, 2].max() - corners[:, 2].min(), cls_label\n                ])\n        point_offset[indices] = center - coords\n        return point_mask, point_offset, point_sem\n\n    def _get_plane_fomulation(self, vector1, vector2, point):\n        \"\"\"Compute the equation of the plane.\n\n        Args:\n            vector1 (torch.Tensor): Parallel vector of the plane.\n            vector2 (torch.Tensor): Parallel vector of the plane.\n            point (torch.Tensor): Point on the plane.\n\n        Returns:\n            torch.Tensor: Equation of the plane.\n        \"\"\"\n        surface_norm = torch.cross(vector1, vector2)\n        surface_dis = -torch.dot(surface_norm, point)\n        plane = point.new_tensor(\n            [surface_norm[0], surface_norm[1], surface_norm[2], surface_dis])\n        return plane\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/part_aggregation_roi_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nfrom torch.nn import functional as F\n\nfrom mmdet3d.core import AssignResult\nfrom mmdet3d.core.bbox import bbox3d2result, bbox3d2roi\nfrom mmdet.core import build_assigner, build_sampler\nfrom ..builder import HEADS, build_head, build_roi_extractor\nfrom .base_3droi_head import Base3DRoIHead\n\n\n@HEADS.register_module()\nclass PartAggregationROIHead(Base3DRoIHead):\n    \"\"\"Part aggregation roi head for PartA2.\n\n    Args:\n        semantic_head (ConfigDict): Config of semantic head.\n        num_classes (int): The number of classes.\n        seg_roi_extractor (ConfigDict): Config of seg_roi_extractor.\n        part_roi_extractor (ConfigDict): Config of part_roi_extractor.\n        bbox_head (ConfigDict): Config of bbox_head.\n        train_cfg (ConfigDict): Training config.\n        test_cfg (ConfigDict): Testing config.\n    \"\"\"\n\n    def __init__(self,\n                 semantic_head,\n                 num_classes=3,\n                 seg_roi_extractor=None,\n                 part_roi_extractor=None,\n                 bbox_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(PartAggregationROIHead, self).__init__(\n            bbox_head=bbox_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            init_cfg=init_cfg)\n        self.num_classes = num_classes\n        assert semantic_head is not None\n        self.semantic_head = build_head(semantic_head)\n\n        if seg_roi_extractor is not None:\n            self.seg_roi_extractor = build_roi_extractor(seg_roi_extractor)\n        if part_roi_extractor is not None:\n            self.part_roi_extractor = build_roi_extractor(part_roi_extractor)\n\n        self.init_assigner_sampler()\n\n        assert not (init_cfg and pretrained), \\\n            'init_cfg and pretrained cannot be setting at the same time'\n        if isinstance(pretrained, str):\n            warnings.warn('DeprecationWarning: pretrained is a deprecated, '\n                          'please use \"init_cfg\" instead')\n            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)\n\n    def init_mask_head(self):\n        \"\"\"Initialize mask head, skip since ``PartAggregationROIHead`` does not\n        have one.\"\"\"\n        pass\n\n    def init_bbox_head(self, bbox_head):\n        \"\"\"Initialize box head.\"\"\"\n        self.bbox_head = build_head(bbox_head)\n\n    def init_assigner_sampler(self):\n        \"\"\"Initialize assigner and sampler.\"\"\"\n        self.bbox_assigner = None\n        self.bbox_sampler = None\n        if self.train_cfg:\n            if isinstance(self.train_cfg.assigner, dict):\n                self.bbox_assigner = build_assigner(self.train_cfg.assigner)\n            elif isinstance(self.train_cfg.assigner, list):\n                self.bbox_assigner = [\n                    build_assigner(res) for res in self.train_cfg.assigner\n                ]\n            self.bbox_sampler = build_sampler(self.train_cfg.sampler)\n\n    @property\n    def with_semantic(self):\n        \"\"\"bool: whether the head has semantic branch\"\"\"\n        return hasattr(self,\n                       'semantic_head') and self.semantic_head is not None\n\n    def forward_train(self, feats_dict, voxels_dict, img_metas, proposal_list,\n                      gt_bboxes_3d, gt_labels_3d):\n        \"\"\"Training forward function of PartAggregationROIHead.\n\n        Args:\n            feats_dict (dict): Contains features from the first stage.\n            voxels_dict (dict): Contains information of voxels.\n            img_metas (list[dict]): Meta info of each image.\n            proposal_list (list[dict]): Proposal information from rpn.\n                The dictionary should contain the following keys:\n\n                - boxes_3d (:obj:`BaseInstance3DBoxes`): Proposal bboxes\n                - labels_3d (torch.Tensor): Labels of proposals\n                - cls_preds (torch.Tensor): Original scores of proposals\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]):\n                GT bboxes of each sample. The bboxes are encapsulated\n                by 3D box structures.\n            gt_labels_3d (list[LongTensor]): GT labels of each sample.\n\n        Returns:\n            dict: losses from each head.\n\n                - loss_semantic (torch.Tensor): loss of semantic head\n                - loss_bbox (torch.Tensor): loss of bboxes\n        \"\"\"\n        losses = dict()\n        if self.with_semantic:\n            semantic_results = self._semantic_forward_train(\n                feats_dict['seg_features'], voxels_dict, gt_bboxes_3d,\n                gt_labels_3d)\n            losses.update(semantic_results['loss_semantic'])\n\n        sample_results = self._assign_and_sample(proposal_list, gt_bboxes_3d,\n                                                 gt_labels_3d)\n        if self.with_bbox:\n            bbox_results = self._bbox_forward_train(\n                feats_dict['seg_features'], semantic_results['part_feats'],\n                voxels_dict, sample_results)\n            losses.update(bbox_results['loss_bbox'])\n\n        return losses\n\n    def simple_test(self, feats_dict, voxels_dict, img_metas, proposal_list,\n                    **kwargs):\n        \"\"\"Simple testing forward function of PartAggregationROIHead.\n\n        Note:\n            This function assumes that the batch size is 1\n\n        Args:\n            feats_dict (dict): Contains features from the first stage.\n            voxels_dict (dict): Contains information of voxels.\n            img_metas (list[dict]): Meta info of each image.\n            proposal_list (list[dict]): Proposal information from rpn.\n\n        Returns:\n            dict: Bbox results of one frame.\n        \"\"\"\n        assert self.with_bbox, 'Bbox head must be implemented.'\n        assert self.with_semantic\n\n        semantic_results = self.semantic_head(feats_dict['seg_features'])\n\n        rois = bbox3d2roi([res['boxes_3d'].tensor for res in proposal_list])\n        labels_3d = [res['labels_3d'] for res in proposal_list]\n        cls_preds = [res['cls_preds'] for res in proposal_list]\n        bbox_results = self._bbox_forward(feats_dict['seg_features'],\n                                          semantic_results['part_feats'],\n                                          voxels_dict, rois)\n\n        bbox_list = self.bbox_head.get_bboxes(\n            rois,\n            bbox_results['cls_score'],\n            bbox_results['bbox_pred'],\n            labels_3d,\n            cls_preds,\n            img_metas,\n            cfg=self.test_cfg)\n\n        bbox_results = [\n            bbox3d2result(bboxes, scores, labels)\n            for bboxes, scores, labels in bbox_list\n        ]\n        return bbox_results\n\n    def _bbox_forward_train(self, seg_feats, part_feats, voxels_dict,\n                            sampling_results):\n        \"\"\"Forward training function of roi_extractor and bbox_head.\n\n        Args:\n            seg_feats (torch.Tensor): Point-wise semantic features.\n            part_feats (torch.Tensor): Point-wise part prediction features.\n            voxels_dict (dict): Contains information of voxels.\n            sampling_results (:obj:`SamplingResult`): Sampled results used\n                for training.\n\n        Returns:\n            dict: Forward results including losses and predictions.\n        \"\"\"\n        rois = bbox3d2roi([res.bboxes for res in sampling_results])\n        bbox_results = self._bbox_forward(seg_feats, part_feats, voxels_dict,\n                                          rois)\n\n        bbox_targets = self.bbox_head.get_targets(sampling_results,\n                                                  self.train_cfg)\n        loss_bbox = self.bbox_head.loss(bbox_results['cls_score'],\n                                        bbox_results['bbox_pred'], rois,\n                                        *bbox_targets)\n\n        bbox_results.update(loss_bbox=loss_bbox)\n        return bbox_results\n\n    def _bbox_forward(self, seg_feats, part_feats, voxels_dict, rois):\n        \"\"\"Forward function of roi_extractor and bbox_head used in both\n        training and testing.\n\n        Args:\n            seg_feats (torch.Tensor): Point-wise semantic features.\n            part_feats (torch.Tensor): Point-wise part prediction features.\n            voxels_dict (dict): Contains information of voxels.\n            rois (Tensor): Roi boxes.\n\n        Returns:\n            dict: Contains predictions of bbox_head and\n                features of roi_extractor.\n        \"\"\"\n        pooled_seg_feats = self.seg_roi_extractor(seg_feats,\n                                                  voxels_dict['voxel_centers'],\n                                                  voxels_dict['coors'][..., 0],\n                                                  rois)\n        pooled_part_feats = self.part_roi_extractor(\n            part_feats, voxels_dict['voxel_centers'],\n            voxels_dict['coors'][..., 0], rois)\n        cls_score, bbox_pred = self.bbox_head(pooled_seg_feats,\n                                              pooled_part_feats)\n\n        bbox_results = dict(\n            cls_score=cls_score,\n            bbox_pred=bbox_pred,\n            pooled_seg_feats=pooled_seg_feats,\n            pooled_part_feats=pooled_part_feats)\n        return bbox_results\n\n    def _assign_and_sample(self, proposal_list, gt_bboxes_3d, gt_labels_3d):\n        \"\"\"Assign and sample proposals for training.\n\n        Args:\n            proposal_list (list[dict]): Proposals produced by RPN.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                boxes.\n            gt_labels_3d (list[torch.Tensor]): Ground truth labels\n\n        Returns:\n            list[:obj:`SamplingResult`]: Sampled results of each training\n                sample.\n        \"\"\"\n        sampling_results = []\n        # bbox assign\n        for batch_idx in range(len(proposal_list)):\n            cur_proposal_list = proposal_list[batch_idx]\n            cur_boxes = cur_proposal_list['boxes_3d']\n            cur_labels_3d = cur_proposal_list['labels_3d']\n            cur_gt_bboxes = gt_bboxes_3d[batch_idx].to(cur_boxes.device)\n            cur_gt_labels = gt_labels_3d[batch_idx]\n\n            batch_num_gts = 0\n            # 0 is bg\n            batch_gt_indis = cur_gt_labels.new_full((len(cur_boxes), ), 0)\n            batch_max_overlaps = cur_boxes.tensor.new_zeros(len(cur_boxes))\n            # -1 is bg\n            batch_gt_labels = cur_gt_labels.new_full((len(cur_boxes), ), -1)\n\n            # each class may have its own assigner\n            if isinstance(self.bbox_assigner, list):\n                for i, assigner in enumerate(self.bbox_assigner):\n                    gt_per_cls = (cur_gt_labels == i)\n                    pred_per_cls = (cur_labels_3d == i)\n                    cur_assign_res = assigner.assign(\n                        cur_boxes.tensor[pred_per_cls],\n                        cur_gt_bboxes.tensor[gt_per_cls],\n                        gt_labels=cur_gt_labels[gt_per_cls])\n                    # gather assign_results in different class into one result\n                    batch_num_gts += cur_assign_res.num_gts\n                    # gt inds (1-based)\n                    gt_inds_arange_pad = gt_per_cls.nonzero(\n                        as_tuple=False).view(-1) + 1\n                    # pad 0 for indice unassigned\n                    gt_inds_arange_pad = F.pad(\n                        gt_inds_arange_pad, (1, 0), mode='constant', value=0)\n                    # pad -1 for indice ignore\n                    gt_inds_arange_pad = F.pad(\n                        gt_inds_arange_pad, (1, 0), mode='constant', value=-1)\n                    # convert to 0~gt_num+2 for indices\n                    gt_inds_arange_pad += 1\n                    # now 0 is bg, >1 is fg in batch_gt_indis\n                    batch_gt_indis[pred_per_cls] = gt_inds_arange_pad[\n                        cur_assign_res.gt_inds + 1] - 1\n                    batch_max_overlaps[\n                        pred_per_cls] = cur_assign_res.max_overlaps\n                    batch_gt_labels[pred_per_cls] = cur_assign_res.labels\n\n                assign_result = AssignResult(batch_num_gts, batch_gt_indis,\n                                             batch_max_overlaps,\n                                             batch_gt_labels)\n            else:  # for single class\n                assign_result = self.bbox_assigner.assign(\n                    cur_boxes.tensor,\n                    cur_gt_bboxes.tensor,\n                    gt_labels=cur_gt_labels)\n            # sample boxes\n            sampling_result = self.bbox_sampler.sample(assign_result,\n                                                       cur_boxes.tensor,\n                                                       cur_gt_bboxes.tensor,\n                                                       cur_gt_labels)\n            sampling_results.append(sampling_result)\n        return sampling_results\n\n    def _semantic_forward_train(self, x, voxels_dict, gt_bboxes_3d,\n                                gt_labels_3d):\n        \"\"\"Train semantic head.\n\n        Args:\n            x (torch.Tensor): Point-wise semantic features for segmentation\n            voxels_dict (dict): Contains information of voxels.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                boxes.\n            gt_labels_3d (list[torch.Tensor]): Ground truth labels\n\n        Returns:\n            dict: Segmentation results including losses\n        \"\"\"\n        semantic_results = self.semantic_head(x)\n        semantic_targets = self.semantic_head.get_targets(\n            voxels_dict, gt_bboxes_3d, gt_labels_3d)\n        loss_semantic = self.semantic_head.loss(semantic_results,\n                                                semantic_targets)\n        semantic_results.update(loss_semantic=loss_semantic)\n        return semantic_results\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/point_rcnn_roi_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom torch.nn import functional as F\n\nfrom mmdet3d.core import AssignResult\nfrom mmdet3d.core.bbox import bbox3d2result, bbox3d2roi\nfrom mmdet.core import build_assigner, build_sampler\nfrom ..builder import HEADS, build_head, build_roi_extractor\nfrom .base_3droi_head import Base3DRoIHead\n\n\n@HEADS.register_module()\nclass PointRCNNRoIHead(Base3DRoIHead):\n    \"\"\"RoI head for PointRCNN.\n\n    Args:\n        bbox_head (dict): Config of bbox_head.\n        point_roi_extractor (dict): Config of RoI extractor.\n        train_cfg (dict): Train configs.\n        test_cfg (dict): Test configs.\n        depth_normalizer (float, optional): Normalize depth feature.\n            Defaults to 70.0.\n        init_cfg (dict, optional): Config of initialization. Defaults to None.\n    \"\"\"\n\n    def __init__(self,\n                 bbox_head,\n                 point_roi_extractor,\n                 train_cfg,\n                 test_cfg,\n                 depth_normalizer=70.0,\n                 pretrained=None,\n                 init_cfg=None):\n        super(PointRCNNRoIHead, self).__init__(\n            bbox_head=bbox_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained,\n            init_cfg=init_cfg)\n        self.depth_normalizer = depth_normalizer\n\n        if point_roi_extractor is not None:\n            self.point_roi_extractor = build_roi_extractor(point_roi_extractor)\n\n        self.init_assigner_sampler()\n\n    def init_bbox_head(self, bbox_head):\n        \"\"\"Initialize box head.\n\n        Args:\n            bbox_head (dict): Config dict of RoI Head.\n        \"\"\"\n        self.bbox_head = build_head(bbox_head)\n\n    def init_mask_head(self):\n        \"\"\"Initialize maek head.\"\"\"\n        pass\n\n    def init_assigner_sampler(self):\n        \"\"\"Initialize assigner and sampler.\"\"\"\n        self.bbox_assigner = None\n        self.bbox_sampler = None\n        if self.train_cfg:\n            if isinstance(self.train_cfg.assigner, dict):\n                self.bbox_assigner = build_assigner(self.train_cfg.assigner)\n            elif isinstance(self.train_cfg.assigner, list):\n                self.bbox_assigner = [\n                    build_assigner(res) for res in self.train_cfg.assigner\n                ]\n            self.bbox_sampler = build_sampler(self.train_cfg.sampler)\n\n    def forward_train(self, feats_dict, input_metas, proposal_list,\n                      gt_bboxes_3d, gt_labels_3d):\n        \"\"\"Training forward function of PointRCNNRoIHead.\n\n        Args:\n            feats_dict (dict): Contains features from the first stage.\n            imput_metas (list[dict]): Meta info of each input.\n            proposal_list (list[dict]): Proposal information from rpn.\n                The dictionary should contain the following keys:\n\n                - boxes_3d (:obj:`BaseInstance3DBoxes`): Proposal bboxes\n                - labels_3d (torch.Tensor): Labels of proposals\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]):\n                GT bboxes of each sample. The bboxes are encapsulated\n                by 3D box structures.\n            gt_labels_3d (list[LongTensor]): GT labels of each sample.\n\n        Returns:\n            dict: Losses from RoI RCNN head.\n                - loss_bbox (torch.Tensor): Loss of bboxes\n        \"\"\"\n        features = feats_dict['features']\n        points = feats_dict['points']\n        point_cls_preds = feats_dict['points_cls_preds']\n        sem_scores = point_cls_preds.sigmoid()\n        point_scores = sem_scores.max(-1)[0]\n\n        sample_results = self._assign_and_sample(proposal_list, gt_bboxes_3d,\n                                                 gt_labels_3d)\n\n        # concat the depth, semantic features and backbone features\n        features = features.transpose(1, 2).contiguous()\n        point_depths = points.norm(dim=2) / self.depth_normalizer - 0.5\n        features_list = [\n            point_scores.unsqueeze(2),\n            point_depths.unsqueeze(2), features\n        ]\n        features = torch.cat(features_list, dim=2)\n\n        bbox_results = self._bbox_forward_train(features, points,\n                                                sample_results)\n        losses = dict()\n        losses.update(bbox_results['loss_bbox'])\n\n        return losses\n\n    def simple_test(self, feats_dict, img_metas, proposal_list, **kwargs):\n        \"\"\"Simple testing forward function of PointRCNNRoIHead.\n\n        Note:\n            This function assumes that the batch size is 1\n\n        Args:\n            feats_dict (dict): Contains features from the first stage.\n            img_metas (list[dict]): Meta info of each image.\n            proposal_list (list[dict]): Proposal information from rpn.\n\n        Returns:\n            dict: Bbox results of one frame.\n        \"\"\"\n        rois = bbox3d2roi([res['boxes_3d'].tensor for res in proposal_list])\n        labels_3d = [res['labels_3d'] for res in proposal_list]\n\n        features = feats_dict['features']\n        points = feats_dict['points']\n        point_cls_preds = feats_dict['points_cls_preds']\n        sem_scores = point_cls_preds.sigmoid()\n        point_scores = sem_scores.max(-1)[0]\n\n        features = features.transpose(1, 2).contiguous()\n        point_depths = points.norm(dim=2) / self.depth_normalizer - 0.5\n        features_list = [\n            point_scores.unsqueeze(2),\n            point_depths.unsqueeze(2), features\n        ]\n\n        features = torch.cat(features_list, dim=2)\n        batch_size = features.shape[0]\n        bbox_results = self._bbox_forward(features, points, batch_size, rois)\n        object_score = bbox_results['cls_score'].sigmoid()\n        bbox_list = self.bbox_head.get_bboxes(\n            rois,\n            object_score,\n            bbox_results['bbox_pred'],\n            labels_3d,\n            img_metas,\n            cfg=self.test_cfg)\n\n        bbox_results = [\n            bbox3d2result(bboxes, scores, labels)\n            for bboxes, scores, labels in bbox_list\n        ]\n        return bbox_results\n\n    def _bbox_forward_train(self, features, points, sampling_results):\n        \"\"\"Forward training function of roi_extractor and bbox_head.\n\n        Args:\n            features (torch.Tensor): Backbone features with depth and \\\n                semantic features.\n            points (torch.Tensor): Pointcloud.\n            sampling_results (:obj:`SamplingResult`): Sampled results used\n                for training.\n\n        Returns:\n            dict: Forward results including losses and predictions.\n        \"\"\"\n        rois = bbox3d2roi([res.bboxes for res in sampling_results])\n        batch_size = features.shape[0]\n        bbox_results = self._bbox_forward(features, points, batch_size, rois)\n        bbox_targets = self.bbox_head.get_targets(sampling_results,\n                                                  self.train_cfg)\n\n        loss_bbox = self.bbox_head.loss(bbox_results['cls_score'],\n                                        bbox_results['bbox_pred'], rois,\n                                        *bbox_targets)\n\n        bbox_results.update(loss_bbox=loss_bbox)\n        return bbox_results\n\n    def _bbox_forward(self, features, points, batch_size, rois):\n        \"\"\"Forward function of roi_extractor and bbox_head used in both\n        training and testing.\n\n        Args:\n            features (torch.Tensor): Backbone features with depth and\n                semantic features.\n            points (torch.Tensor): Pointcloud.\n            batch_size (int): Batch size.\n            rois (torch.Tensor): RoI boxes.\n\n        Returns:\n            dict: Contains predictions of bbox_head and\n                features of roi_extractor.\n        \"\"\"\n        pooled_point_feats = self.point_roi_extractor(features, points,\n                                                      batch_size, rois)\n\n        cls_score, bbox_pred = self.bbox_head(pooled_point_feats)\n        bbox_results = dict(cls_score=cls_score, bbox_pred=bbox_pred)\n        return bbox_results\n\n    def _assign_and_sample(self, proposal_list, gt_bboxes_3d, gt_labels_3d):\n        \"\"\"Assign and sample proposals for training.\n\n        Args:\n            proposal_list (list[dict]): Proposals produced by RPN.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                boxes.\n            gt_labels_3d (list[torch.Tensor]): Ground truth labels\n\n        Returns:\n            list[:obj:`SamplingResult`]: Sampled results of each training\n                sample.\n        \"\"\"\n        sampling_results = []\n        # bbox assign\n        for batch_idx in range(len(proposal_list)):\n            cur_proposal_list = proposal_list[batch_idx]\n            cur_boxes = cur_proposal_list['boxes_3d']\n            cur_labels_3d = cur_proposal_list['labels_3d']\n            cur_gt_bboxes = gt_bboxes_3d[batch_idx].to(cur_boxes.device)\n            cur_gt_labels = gt_labels_3d[batch_idx]\n            batch_num_gts = 0\n            # 0 is bg\n            batch_gt_indis = cur_gt_labels.new_full((len(cur_boxes), ), 0)\n            batch_max_overlaps = cur_boxes.tensor.new_zeros(len(cur_boxes))\n            # -1 is bg\n            batch_gt_labels = cur_gt_labels.new_full((len(cur_boxes), ), -1)\n\n            # each class may have its own assigner\n            if isinstance(self.bbox_assigner, list):\n                for i, assigner in enumerate(self.bbox_assigner):\n                    gt_per_cls = (cur_gt_labels == i)\n                    pred_per_cls = (cur_labels_3d == i)\n                    cur_assign_res = assigner.assign(\n                        cur_boxes.tensor[pred_per_cls],\n                        cur_gt_bboxes.tensor[gt_per_cls],\n                        gt_labels=cur_gt_labels[gt_per_cls])\n                    # gather assign_results in different class into one result\n                    batch_num_gts += cur_assign_res.num_gts\n                    # gt inds (1-based)\n                    gt_inds_arange_pad = gt_per_cls.nonzero(\n                        as_tuple=False).view(-1) + 1\n                    # pad 0 for indice unassigned\n                    gt_inds_arange_pad = F.pad(\n                        gt_inds_arange_pad, (1, 0), mode='constant', value=0)\n                    # pad -1 for indice ignore\n                    gt_inds_arange_pad = F.pad(\n                        gt_inds_arange_pad, (1, 0), mode='constant', value=-1)\n                    # convert to 0~gt_num+2 for indices\n                    gt_inds_arange_pad += 1\n                    # now 0 is bg, >1 is fg in batch_gt_indis\n                    batch_gt_indis[pred_per_cls] = gt_inds_arange_pad[\n                        cur_assign_res.gt_inds + 1] - 1\n                    batch_max_overlaps[\n                        pred_per_cls] = cur_assign_res.max_overlaps\n                    batch_gt_labels[pred_per_cls] = cur_assign_res.labels\n\n                assign_result = AssignResult(batch_num_gts, batch_gt_indis,\n                                             batch_max_overlaps,\n                                             batch_gt_labels)\n            else:  # for single class\n                assign_result = self.bbox_assigner.assign(\n                    cur_boxes.tensor,\n                    cur_gt_bboxes.tensor,\n                    gt_labels=cur_gt_labels)\n\n            # sample boxes\n            sampling_result = self.bbox_sampler.sample(assign_result,\n                                                       cur_boxes.tensor,\n                                                       cur_gt_bboxes.tensor,\n                                                       cur_gt_labels)\n            sampling_results.append(sampling_result)\n        return sampling_results\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/roi_extractors/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmdet.models.roi_heads.roi_extractors import SingleRoIExtractor\nfrom .single_roiaware_extractor import Single3DRoIAwareExtractor\nfrom .single_roipoint_extractor import Single3DRoIPointExtractor\n\n__all__ = [\n    'SingleRoIExtractor', 'Single3DRoIAwareExtractor',\n    'Single3DRoIPointExtractor'\n]\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/roi_extractors/single_roiaware_extractor.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv import ops\nfrom mmcv.runner import BaseModule\n\nfrom mmdet3d.models.builder import ROI_EXTRACTORS\n\n\n@ROI_EXTRACTORS.register_module()\nclass Single3DRoIAwareExtractor(BaseModule):\n    \"\"\"Point-wise roi-aware Extractor.\n\n    Extract Point-wise roi features.\n\n    Args:\n        roi_layer (dict): The config of roi layer.\n    \"\"\"\n\n    def __init__(self, roi_layer=None, init_cfg=None):\n        super(Single3DRoIAwareExtractor, self).__init__(init_cfg=init_cfg)\n        self.roi_layer = self.build_roi_layers(roi_layer)\n\n    def build_roi_layers(self, layer_cfg):\n        \"\"\"Build roi layers using `layer_cfg`\"\"\"\n        cfg = layer_cfg.copy()\n        layer_type = cfg.pop('type')\n        assert hasattr(ops, layer_type)\n        layer_cls = getattr(ops, layer_type)\n        roi_layers = layer_cls(**cfg)\n        return roi_layers\n\n    def forward(self, feats, coordinate, batch_inds, rois):\n        \"\"\"Extract point-wise roi features.\n\n        Args:\n            feats (torch.FloatTensor): Point-wise features with\n                shape (batch, npoints, channels) for pooling.\n            coordinate (torch.FloatTensor): Coordinate of each point.\n            batch_inds (torch.LongTensor): Indicate the batch of each point.\n            rois (torch.FloatTensor): Roi boxes with batch indices.\n\n        Returns:\n            torch.FloatTensor: Pooled features\n        \"\"\"\n        pooled_roi_feats = []\n        for batch_idx in range(int(batch_inds.max()) + 1):\n            roi_inds = (rois[..., 0].int() == batch_idx)\n            coors_inds = (batch_inds.int() == batch_idx)\n            pooled_roi_feat = self.roi_layer(rois[..., 1:][roi_inds],\n                                             coordinate[coors_inds],\n                                             feats[coors_inds])\n            pooled_roi_feats.append(pooled_roi_feat)\n        pooled_roi_feats = torch.cat(pooled_roi_feats, 0)\n        return pooled_roi_feats\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/roi_extractors/single_roipoint_extractor.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv import ops\nfrom torch import nn as nn\n\nfrom mmdet3d.core.bbox.structures import rotation_3d_in_axis\nfrom mmdet3d.models.builder import ROI_EXTRACTORS\n\n\n@ROI_EXTRACTORS.register_module()\nclass Single3DRoIPointExtractor(nn.Module):\n    \"\"\"Point-wise roi-aware Extractor.\n\n    Extract Point-wise roi features.\n\n    Args:\n        roi_layer (dict): The config of roi layer.\n    \"\"\"\n\n    def __init__(self, roi_layer=None):\n        super(Single3DRoIPointExtractor, self).__init__()\n        self.roi_layer = self.build_roi_layers(roi_layer)\n\n    def build_roi_layers(self, layer_cfg):\n        \"\"\"Build roi layers using `layer_cfg`\"\"\"\n        cfg = layer_cfg.copy()\n        layer_type = cfg.pop('type')\n        assert hasattr(ops, layer_type)\n        layer_cls = getattr(ops, layer_type)\n        roi_layers = layer_cls(**cfg)\n        return roi_layers\n\n    def forward(self, feats, coordinate, batch_inds, rois):\n        \"\"\"Extract point-wise roi features.\n\n        Args:\n            feats (torch.FloatTensor): Point-wise features with\n                shape (batch, npoints, channels) for pooling.\n            coordinate (torch.FloatTensor): Coordinate of each point.\n            batch_inds (torch.LongTensor): Indicate the batch of each point.\n            rois (torch.FloatTensor): Roi boxes with batch indices.\n\n        Returns:\n            torch.FloatTensor: Pooled features\n        \"\"\"\n        rois = rois[..., 1:]\n        rois = rois.view(batch_inds, -1, rois.shape[-1])\n        with torch.no_grad():\n            pooled_roi_feat, pooled_empty_flag = self.roi_layer(\n                coordinate, feats, rois)\n\n            # canonical transformation\n            roi_center = rois[:, :, 0:3]\n            pooled_roi_feat[:, :, :, 0:3] -= roi_center.unsqueeze(dim=2)\n            pooled_roi_feat = pooled_roi_feat.view(-1,\n                                                   pooled_roi_feat.shape[-2],\n                                                   pooled_roi_feat.shape[-1])\n            pooled_roi_feat[:, :, 0:3] = rotation_3d_in_axis(\n                pooled_roi_feat[:, :, 0:3],\n                -(rois.view(-1, rois.shape[-1])[:, 6]),\n                axis=2)\n            pooled_roi_feat[pooled_empty_flag.view(-1) > 0] = 0\n\n        return pooled_roi_feat\n"
  },
  {
    "path": "mmdet3d/models/segmentors/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .base import Base3DSegmentor\nfrom .encoder_decoder import EncoderDecoder3D\n\n__all__ = ['Base3DSegmentor', 'EncoderDecoder3D']\n"
  },
  {
    "path": "mmdet3d/models/segmentors/base.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom os import path as osp\n\nimport mmcv\nimport numpy as np\nimport torch\nfrom mmcv.parallel import DataContainer as DC\nfrom mmcv.runner import auto_fp16\n\nfrom mmdet3d.core import show_seg_result\nfrom mmseg.models.segmentors import BaseSegmentor\n\n\nclass Base3DSegmentor(BaseSegmentor):\n    \"\"\"Base class for 3D segmentors.\n\n    The main difference with `BaseSegmentor` is that we modify the keys in\n    data_dict and use a 3D seg specific visualization function.\n    \"\"\"\n\n    @property\n    def with_regularization_loss(self):\n        \"\"\"bool: whether the segmentor has regularization loss for weight\"\"\"\n        return hasattr(self, 'loss_regularization') and \\\n            self.loss_regularization is not None\n\n    def forward_test(self, points, img_metas, **kwargs):\n        \"\"\"Calls either simple_test or aug_test depending on the length of\n        outer list of points. If len(points) == 1, call simple_test. Otherwise\n        call aug_test to aggregate the test results by e.g. voting.\n\n        Args:\n            points (list[list[torch.Tensor]]): the outer list indicates\n                test-time augmentations and inner torch.Tensor should have a\n                shape BXNxC, which contains all points in the batch.\n            img_metas (list[list[dict]]): the outer list indicates test-time\n                augs (multiscale, flip, etc.) and the inner list indicates\n                images in a batch.\n        \"\"\"\n        for var, name in [(points, 'points'), (img_metas, 'img_metas')]:\n            if not isinstance(var, list):\n                raise TypeError(f'{name} must be a list, but got {type(var)}')\n\n        num_augs = len(points)\n        if num_augs != len(img_metas):\n            raise ValueError(f'num of augmentations ({len(points)}) != '\n                             f'num of image meta ({len(img_metas)})')\n\n        if num_augs == 1:\n            return self.simple_test(points[0], img_metas[0], **kwargs)\n        else:\n            return self.aug_test(points, img_metas, **kwargs)\n\n    @auto_fp16(apply_to=('points'))\n    def forward(self, return_loss=True, **kwargs):\n        \"\"\"Calls either forward_train or forward_test depending on whether\n        return_loss=True.\n\n        Note this setting will change the expected inputs. When\n        `return_loss=True`, point and img_metas are single-nested (i.e.\n        torch.Tensor and list[dict]), and when `resturn_loss=False`, point and\n        img_metas should be double nested (i.e.  list[torch.Tensor],\n        list[list[dict]]), with the outer list indicating test time\n        augmentations.\n        \"\"\"\n        if return_loss:\n            return self.forward_train(**kwargs)\n        else:\n            return self.forward_test(**kwargs)\n\n    def show_results(self,\n                     data,\n                     result,\n                     palette=None,\n                     out_dir=None,\n                     ignore_index=None,\n                     show=False,\n                     score_thr=None):\n        \"\"\"Results visualization.\n\n        Args:\n            data (list[dict]): Input points and the information of the sample.\n            result (list[dict]): Prediction results.\n            palette (list[list[int]]] | np.ndarray): The palette of\n                segmentation map. If None is given, random palette will be\n                generated. Default: None\n            out_dir (str): Output directory of visualization result.\n            ignore_index (int, optional): The label index to be ignored, e.g.\n                unannotated points. If None is given, set to len(self.CLASSES).\n                Defaults to None.\n            show (bool, optional): Determines whether you are\n                going to show result by open3d.\n                Defaults to False.\n            TODO: implement score_thr of Base3DSegmentor.\n            score_thr (float, optional): Score threshold of bounding boxes.\n                Default to None.\n                Not implemented yet, but it is here for unification.\n        \"\"\"\n        assert out_dir is not None, 'Expect out_dir, got none.'\n        if palette is None:\n            if self.PALETTE is None:\n                palette = np.random.randint(\n                    0, 255, size=(len(self.CLASSES), 3))\n            else:\n                palette = self.PALETTE\n        palette = np.array(palette)\n        for batch_id in range(len(result)):\n            if isinstance(data['points'][0], DC):\n                points = data['points'][0]._data[0][batch_id].numpy()\n            elif mmcv.is_list_of(data['points'][0], torch.Tensor):\n                points = data['points'][0][batch_id]\n            else:\n                ValueError(f\"Unsupported data type {type(data['points'][0])} \"\n                           f'for visualization!')\n            if isinstance(data['img_metas'][0], DC):\n                pts_filename = data['img_metas'][0]._data[0][batch_id][\n                    'pts_filename']\n            elif mmcv.is_list_of(data['img_metas'][0], dict):\n                pts_filename = data['img_metas'][0][batch_id]['pts_filename']\n            else:\n                ValueError(\n                    f\"Unsupported data type {type(data['img_metas'][0])} \"\n                    f'for visualization!')\n            file_name = osp.split(pts_filename)[-1].split('.')[0]\n\n            pred_sem_mask = result[batch_id]['semantic_mask'].cpu().numpy()\n\n            show_seg_result(\n                points,\n                None,\n                pred_sem_mask,\n                out_dir,\n                file_name,\n                palette,\n                ignore_index,\n                show=show)\n"
  },
  {
    "path": "mmdet3d/models/segmentors/encoder_decoder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nfrom torch import nn as nn\nfrom torch.nn import functional as F\n\nfrom mmseg.core import add_prefix\nfrom ..builder import (SEGMENTORS, build_backbone, build_head, build_loss,\n                       build_neck)\nfrom .base import Base3DSegmentor\n\n\n@SEGMENTORS.register_module()\nclass EncoderDecoder3D(Base3DSegmentor):\n    \"\"\"3D Encoder Decoder segmentors.\n\n    EncoderDecoder typically consists of backbone, decode_head, auxiliary_head.\n    Note that auxiliary_head is only used for deep supervision during training,\n    which could be thrown during inference.\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 decode_head,\n                 neck=None,\n                 auxiliary_head=None,\n                 loss_regularization=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(EncoderDecoder3D, self).__init__(init_cfg=init_cfg)\n        self.backbone = build_backbone(backbone)\n        if neck is not None:\n            self.neck = build_neck(neck)\n        self._init_decode_head(decode_head)\n        self._init_auxiliary_head(auxiliary_head)\n        self._init_loss_regularization(loss_regularization)\n\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        assert self.with_decode_head, \\\n            '3D EncoderDecoder Segmentor should have a decode_head'\n\n    def _init_decode_head(self, decode_head):\n        \"\"\"Initialize ``decode_head``\"\"\"\n        self.decode_head = build_head(decode_head)\n        self.num_classes = self.decode_head.num_classes\n\n    def _init_auxiliary_head(self, auxiliary_head):\n        \"\"\"Initialize ``auxiliary_head``\"\"\"\n        if auxiliary_head is not None:\n            if isinstance(auxiliary_head, list):\n                self.auxiliary_head = nn.ModuleList()\n                for head_cfg in auxiliary_head:\n                    self.auxiliary_head.append(build_head(head_cfg))\n            else:\n                self.auxiliary_head = build_head(auxiliary_head)\n\n    def _init_loss_regularization(self, loss_regularization):\n        \"\"\"Initialize ``loss_regularization``\"\"\"\n        if loss_regularization is not None:\n            if isinstance(loss_regularization, list):\n                self.loss_regularization = nn.ModuleList()\n                for loss_cfg in loss_regularization:\n                    self.loss_regularization.append(build_loss(loss_cfg))\n            else:\n                self.loss_regularization = build_loss(loss_regularization)\n\n    def extract_feat(self, points):\n        \"\"\"Extract features from points.\"\"\"\n        x = self.backbone(points)\n        if self.with_neck:\n            x = self.neck(x)\n        return x\n\n    def encode_decode(self, points, img_metas):\n        \"\"\"Encode points with backbone and decode into a semantic segmentation\n        map of the same size as input.\n\n        Args:\n            points (torch.Tensor): Input points of shape [B, N, 3+C].\n            img_metas (list[dict]): Meta information of each sample.\n\n        Returns:\n            torch.Tensor: Segmentation logits of shape [B, num_classes, N].\n        \"\"\"\n        x = self.extract_feat(points)\n        out = self._decode_head_forward_test(x, img_metas)\n        return out\n\n    def _decode_head_forward_train(self, x, img_metas, pts_semantic_mask):\n        \"\"\"Run forward function and calculate loss for decode head in\n        training.\"\"\"\n        losses = dict()\n        loss_decode = self.decode_head.forward_train(x, img_metas,\n                                                     pts_semantic_mask,\n                                                     self.train_cfg)\n\n        losses.update(add_prefix(loss_decode, 'decode'))\n        return losses\n\n    def _decode_head_forward_test(self, x, img_metas):\n        \"\"\"Run forward function and calculate loss for decode head in\n        inference.\"\"\"\n        seg_logits = self.decode_head.forward_test(x, img_metas, self.test_cfg)\n        return seg_logits\n\n    def _auxiliary_head_forward_train(self, x, img_metas, pts_semantic_mask):\n        \"\"\"Run forward function and calculate loss for auxiliary head in\n        training.\"\"\"\n        losses = dict()\n        if isinstance(self.auxiliary_head, nn.ModuleList):\n            for idx, aux_head in enumerate(self.auxiliary_head):\n                loss_aux = aux_head.forward_train(x, img_metas,\n                                                  pts_semantic_mask,\n                                                  self.train_cfg)\n                losses.update(add_prefix(loss_aux, f'aux_{idx}'))\n        else:\n            loss_aux = self.auxiliary_head.forward_train(\n                x, img_metas, pts_semantic_mask, self.train_cfg)\n            losses.update(add_prefix(loss_aux, 'aux'))\n\n        return losses\n\n    def _loss_regularization_forward_train(self):\n        \"\"\"Calculate regularization loss for model weight in training.\"\"\"\n        losses = dict()\n        if isinstance(self.loss_regularization, nn.ModuleList):\n            for idx, regularize_loss in enumerate(self.loss_regularization):\n                loss_regularize = dict(\n                    loss_regularize=regularize_loss(self.modules()))\n                losses.update(add_prefix(loss_regularize, f'regularize_{idx}'))\n        else:\n            loss_regularize = dict(\n                loss_regularize=self.loss_regularization(self.modules()))\n            losses.update(add_prefix(loss_regularize, 'regularize'))\n\n        return losses\n\n    def forward_dummy(self, points):\n        \"\"\"Dummy forward function.\"\"\"\n        seg_logit = self.encode_decode(points, None)\n\n        return seg_logit\n\n    def forward_train(self, points, img_metas, pts_semantic_mask):\n        \"\"\"Forward function for training.\n\n        Args:\n            points (list[torch.Tensor]): List of points of shape [N, C].\n            img_metas (list): Image metas.\n            pts_semantic_mask (list[torch.Tensor]): List of point-wise semantic\n                labels of shape [N].\n\n        Returns:\n            dict[str, Tensor]: Losses.\n        \"\"\"\n        points_cat = torch.stack(points)\n        pts_semantic_mask_cat = torch.stack(pts_semantic_mask)\n\n        # extract features using backbone\n        x = self.extract_feat(points_cat)\n\n        losses = dict()\n\n        loss_decode = self._decode_head_forward_train(x, img_metas,\n                                                      pts_semantic_mask_cat)\n        losses.update(loss_decode)\n\n        if self.with_auxiliary_head:\n            loss_aux = self._auxiliary_head_forward_train(\n                x, img_metas, pts_semantic_mask_cat)\n            losses.update(loss_aux)\n\n        if self.with_regularization_loss:\n            loss_regularize = self._loss_regularization_forward_train()\n            losses.update(loss_regularize)\n\n        return losses\n\n    @staticmethod\n    def _input_generation(coords,\n                          patch_center,\n                          coord_max,\n                          feats,\n                          use_normalized_coord=False):\n        \"\"\"Generating model input.\n\n        Generate input by subtracting patch center and adding additional\n            features. Currently support colors and normalized xyz as features.\n\n        Args:\n            coords (torch.Tensor): Sampled 3D point coordinate of shape [S, 3].\n            patch_center (torch.Tensor): Center coordinate of the patch.\n            coord_max (torch.Tensor): Max coordinate of all 3D points.\n            feats (torch.Tensor): Features of sampled points of shape [S, C].\n            use_normalized_coord (bool, optional): Whether to use normalized\n                xyz as additional features. Defaults to False.\n\n        Returns:\n            torch.Tensor: The generated input data of shape [S, 3+C'].\n        \"\"\"\n        # subtract patch center, the z dimension is not centered\n        centered_coords = coords.clone()\n        centered_coords[:, 0] -= patch_center[0]\n        centered_coords[:, 1] -= patch_center[1]\n\n        # normalized coordinates as extra features\n        if use_normalized_coord:\n            normalized_coord = coords / coord_max\n            feats = torch.cat([feats, normalized_coord], dim=1)\n\n        points = torch.cat([centered_coords, feats], dim=1)\n\n        return points\n\n    def _sliding_patch_generation(self,\n                                  points,\n                                  num_points,\n                                  block_size,\n                                  sample_rate=0.5,\n                                  use_normalized_coord=False,\n                                  eps=1e-3):\n        \"\"\"Sampling points in a sliding window fashion.\n\n        First sample patches to cover all the input points.\n        Then sample points in each patch to batch points of a certain number.\n\n        Args:\n            points (torch.Tensor): Input points of shape [N, 3+C].\n            num_points (int): Number of points to be sampled in each patch.\n            block_size (float, optional): Size of a patch to sample.\n            sample_rate (float, optional): Stride used in sliding patch.\n                Defaults to 0.5.\n            use_normalized_coord (bool, optional): Whether to use normalized\n                xyz as additional features. Defaults to False.\n            eps (float, optional): A value added to patch boundary to guarantee\n                points coverage. Defaults to 1e-3.\n\n        Returns:\n            np.ndarray | np.ndarray:\n\n                - patch_points (torch.Tensor): Points of different patches of\n                    shape [K, N, 3+C].\n                - patch_idxs (torch.Tensor): Index of each point in\n                    `patch_points`, of shape [K, N].\n        \"\"\"\n        device = points.device\n        # we assume the first three dims are points' 3D coordinates\n        # and the rest dims are their per-point features\n        coords = points[:, :3]\n        feats = points[:, 3:]\n\n        coord_max = coords.max(0)[0]\n        coord_min = coords.min(0)[0]\n        stride = block_size * sample_rate\n        num_grid_x = int(\n            torch.ceil((coord_max[0] - coord_min[0] - block_size) /\n                       stride).item() + 1)\n        num_grid_y = int(\n            torch.ceil((coord_max[1] - coord_min[1] - block_size) /\n                       stride).item() + 1)\n\n        patch_points, patch_idxs = [], []\n        for idx_y in range(num_grid_y):\n            s_y = coord_min[1] + idx_y * stride\n            e_y = torch.min(s_y + block_size, coord_max[1])\n            s_y = e_y - block_size\n            for idx_x in range(num_grid_x):\n                s_x = coord_min[0] + idx_x * stride\n                e_x = torch.min(s_x + block_size, coord_max[0])\n                s_x = e_x - block_size\n\n                # extract points within this patch\n                cur_min = torch.tensor([s_x, s_y, coord_min[2]]).to(device)\n                cur_max = torch.tensor([e_x, e_y, coord_max[2]]).to(device)\n                cur_choice = ((coords >= cur_min - eps) &\n                              (coords <= cur_max + eps)).all(dim=1)\n\n                if not cur_choice.any():  # no points in this patch\n                    continue\n\n                # sample points in this patch to multiple batches\n                cur_center = cur_min + block_size / 2.0\n                point_idxs = torch.nonzero(cur_choice, as_tuple=True)[0]\n                num_batch = int(np.ceil(point_idxs.shape[0] / num_points))\n                point_size = int(num_batch * num_points)\n                replace = point_size > 2 * point_idxs.shape[0]\n                num_repeat = point_size - point_idxs.shape[0]\n                if replace:  # duplicate\n                    point_idxs_repeat = point_idxs[torch.randint(\n                        0, point_idxs.shape[0],\n                        size=(num_repeat, )).to(device)]\n                else:\n                    point_idxs_repeat = point_idxs[torch.randperm(\n                        point_idxs.shape[0])[:num_repeat]]\n\n                choices = torch.cat([point_idxs, point_idxs_repeat], dim=0)\n                choices = choices[torch.randperm(choices.shape[0])]\n\n                # construct model input\n                point_batches = self._input_generation(\n                    coords[choices],\n                    cur_center,\n                    coord_max,\n                    feats[choices],\n                    use_normalized_coord=use_normalized_coord)\n\n                patch_points.append(point_batches)\n                patch_idxs.append(choices)\n\n        patch_points = torch.cat(patch_points, dim=0)\n        patch_idxs = torch.cat(patch_idxs, dim=0)\n\n        # make sure all points are sampled at least once\n        assert torch.unique(patch_idxs).shape[0] == points.shape[0], \\\n            'some points are not sampled in sliding inference'\n\n        return patch_points, patch_idxs\n\n    def slide_inference(self, point, img_meta, rescale):\n        \"\"\"Inference by sliding-window with overlap.\n\n        Args:\n            point (torch.Tensor): Input points of shape [N, 3+C].\n            img_meta (dict): Meta information of input sample.\n            rescale (bool): Whether transform to original number of points.\n                Will be used for voxelization based segmentors.\n\n        Returns:\n            Tensor: The output segmentation map of shape [num_classes, N].\n        \"\"\"\n        num_points = self.test_cfg.num_points\n        block_size = self.test_cfg.block_size\n        sample_rate = self.test_cfg.sample_rate\n        use_normalized_coord = self.test_cfg.use_normalized_coord\n        batch_size = self.test_cfg.batch_size * num_points\n\n        # patch_points is of shape [K*N, 3+C], patch_idxs is of shape [K*N]\n        patch_points, patch_idxs = self._sliding_patch_generation(\n            point, num_points, block_size, sample_rate, use_normalized_coord)\n        feats_dim = patch_points.shape[1]\n        seg_logits = []  # save patch predictions\n\n        for batch_idx in range(0, patch_points.shape[0], batch_size):\n            batch_points = patch_points[batch_idx:batch_idx + batch_size]\n            batch_points = batch_points.view(-1, num_points, feats_dim)\n            # batch_seg_logit is of shape [B, num_classes, N]\n            batch_seg_logit = self.encode_decode(batch_points, img_meta)\n            batch_seg_logit = batch_seg_logit.transpose(1, 2).contiguous()\n            seg_logits.append(batch_seg_logit.view(-1, self.num_classes))\n\n        # aggregate per-point logits by indexing sum and dividing count\n        seg_logits = torch.cat(seg_logits, dim=0)  # [K*N, num_classes]\n        expand_patch_idxs = patch_idxs.unsqueeze(1).repeat(1, self.num_classes)\n        preds = point.new_zeros((point.shape[0], self.num_classes)).\\\n            scatter_add_(dim=0, index=expand_patch_idxs, src=seg_logits)\n        count_mat = torch.bincount(patch_idxs)\n        preds = preds / count_mat[:, None]\n\n        # TODO: if rescale and voxelization segmentor\n\n        return preds.transpose(0, 1)  # to [num_classes, K*N]\n\n    def whole_inference(self, points, img_metas, rescale):\n        \"\"\"Inference with full scene (one forward pass without sliding).\"\"\"\n        seg_logit = self.encode_decode(points, img_metas)\n        # TODO: if rescale and voxelization segmentor\n        return seg_logit\n\n    def inference(self, points, img_metas, rescale):\n        \"\"\"Inference with slide/whole style.\n\n        Args:\n            points (torch.Tensor): Input points of shape [B, N, 3+C].\n            img_metas (list[dict]): Meta information of each sample.\n            rescale (bool): Whether transform to original number of points.\n                Will be used for voxelization based segmentors.\n\n        Returns:\n            Tensor: The output segmentation map.\n        \"\"\"\n        assert self.test_cfg.mode in ['slide', 'whole']\n        if self.test_cfg.mode == 'slide':\n            seg_logit = torch.stack([\n                self.slide_inference(point, img_meta, rescale)\n                for point, img_meta in zip(points, img_metas)\n            ], 0)\n        else:\n            seg_logit = self.whole_inference(points, img_metas, rescale)\n        output = F.softmax(seg_logit, dim=1)\n        return output\n\n    def simple_test(self, points, img_metas, rescale=True):\n        \"\"\"Simple test with single scene.\n\n        Args:\n            points (list[torch.Tensor]): List of points of shape [N, 3+C].\n            img_metas (list[dict]): Meta information of each sample.\n            rescale (bool): Whether transform to original number of points.\n                Will be used for voxelization based segmentors.\n                Defaults to True.\n\n        Returns:\n            list[dict]: The output prediction result with following keys:\n\n                - semantic_mask (Tensor): Segmentation mask of shape [N].\n        \"\"\"\n        # 3D segmentation requires per-point prediction, so it's impossible\n        # to use down-sampling to get a batch of scenes with same num_points\n        # therefore, we only support testing one scene every time\n        seg_pred = []\n        for point, img_meta in zip(points, img_metas):\n            seg_prob = self.inference(point.unsqueeze(0), [img_meta],\n                                      rescale)[0]\n            seg_map = seg_prob.argmax(0)  # [N]\n            # to cpu tensor for consistency with det3d\n            seg_map = seg_map.cpu()\n            seg_pred.append(seg_map)\n        # warp in dict\n        seg_pred = [dict(semantic_mask=seg_map) for seg_map in seg_pred]\n        return seg_pred\n\n    def aug_test(self, points, img_metas, rescale=True):\n        \"\"\"Test with augmentations.\n\n        Args:\n            points (list[torch.Tensor]): List of points of shape [B, N, 3+C].\n            img_metas (list[list[dict]]): Meta information of each sample.\n                Outer list are different samples while inner is different augs.\n            rescale (bool): Whether transform to original number of points.\n                Will be used for voxelization based segmentors.\n                Defaults to True.\n\n        Returns:\n            list[dict]: The output prediction result with following keys:\n\n                - semantic_mask (Tensor): Segmentation mask of shape [N].\n        \"\"\"\n        # in aug_test, one scene going through different augmentations could\n        # have the same number of points and are stacked as a batch\n        # to save memory, we get augmented seg logit inplace\n        seg_pred = []\n        for point, img_meta in zip(points, img_metas):\n            seg_prob = self.inference(point, img_meta, rescale)\n            seg_prob = seg_prob.mean(0)  # [num_classes, N]\n            seg_map = seg_prob.argmax(0)  # [N]\n            # to cpu tensor for consistency with det3d\n            seg_map = seg_map.cpu()\n            seg_pred.append(seg_map)\n        # warp in dict\n        seg_pred = [dict(semantic_mask=seg_map) for seg_map in seg_pred]\n        return seg_pred\n"
  },
  {
    "path": "mmdet3d/models/utils/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .clip_sigmoid import clip_sigmoid\nfrom .edge_indices import get_edge_indices\nfrom .gen_keypoints import get_keypoints\nfrom .handle_objs import filter_outside_objs, handle_proj_objs\nfrom .mlp import MLP\n\n__all__ = [\n    'clip_sigmoid', 'MLP', 'get_edge_indices', 'filter_outside_objs',\n    'handle_proj_objs', 'get_keypoints'\n]\n"
  },
  {
    "path": "mmdet3d/models/utils/clip_sigmoid.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\n\ndef clip_sigmoid(x, eps=1e-4):\n    \"\"\"Sigmoid function for input feature.\n\n    Args:\n        x (torch.Tensor): Input feature map with the shape of [B, N, H, W].\n        eps (float, optional): Lower bound of the range to be clamped to.\n            Defaults to 1e-4.\n\n    Returns:\n        torch.Tensor: Feature map after sigmoid.\n    \"\"\"\n    y = torch.clamp(x.sigmoid_(), min=eps, max=1 - eps)\n    return y\n"
  },
  {
    "path": "mmdet3d/models/utils/edge_indices.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\n\n\ndef get_edge_indices(img_metas,\n                     downsample_ratio,\n                     step=1,\n                     pad_mode='default',\n                     dtype=np.float32,\n                     device='cpu'):\n    \"\"\"Function to filter the objects label outside the image.\n    The edge_indices are generated using numpy on cpu rather\n    than on CUDA due to the latency issue. When batch size = 8,\n    this function with numpy array is ~8 times faster than that\n    with CUDA tensor (0.09s and 0.72s in 100 runs).\n\n    Args:\n        img_metas (list[dict]): Meta information of each image, e.g.,\n            image size, scaling factor, etc.\n        downsample_ratio (int): Downsample ratio of output feature,\n        step (int, optional): Step size used for generateing\n            edge indices. Default: 1.\n        pad_mode (str, optional): Padding mode during data pipeline.\n            Default: 'default'.\n        dtype (torch.dtype, optional): Dtype of edge indices tensor.\n            Default: np.float32.\n        device (str, optional): Device of edge indices tensor.\n            Default: 'cpu'.\n\n    Returns:\n        list[Tensor]: Edge indices for each image in batch data.\n    \"\"\"\n    edge_indices_list = []\n    for i in range(len(img_metas)):\n        img_shape = img_metas[i]['img_shape']\n        pad_shape = img_metas[i]['pad_shape']\n        h, w = img_shape[:2]\n        pad_h, pad_w = pad_shape\n        edge_indices = []\n\n        if pad_mode == 'default':\n            x_min = 0\n            y_min = 0\n            x_max = (w - 1) // downsample_ratio\n            y_max = (h - 1) // downsample_ratio\n        elif pad_mode == 'center':\n            x_min = np.ceil((pad_w - w) / 2 * downsample_ratio)\n            y_min = np.ceil((pad_h - h) / 2 * downsample_ratio)\n            x_max = x_min + w // downsample_ratio\n            y_max = y_min + h // downsample_ratio\n        else:\n            raise NotImplementedError\n\n        # left\n        y = np.arange(y_min, y_max, step, dtype=dtype)\n        x = np.ones(len(y)) * x_min\n\n        edge_indices_edge = np.stack((x, y), axis=1)\n        edge_indices.append(edge_indices_edge)\n\n        # bottom\n        x = np.arange(x_min, x_max, step, dtype=dtype)\n        y = np.ones(len(x)) * y_max\n\n        edge_indices_edge = np.stack((x, y), axis=1)\n        edge_indices.append(edge_indices_edge)\n\n        # right\n        y = np.arange(y_max, y_min, -step, dtype=dtype)\n        x = np.ones(len(y)) * x_max\n\n        edge_indices_edge = np.stack((x, y), axis=1)\n        edge_indices.append(edge_indices_edge)\n\n        # top\n        x = np.arange(x_max, x_min, -step, dtype=dtype)\n        y = np.ones(len(x)) * y_min\n\n        edge_indices_edge = np.stack((x, y), axis=1)\n        edge_indices.append(edge_indices_edge)\n\n        edge_indices = \\\n            np.concatenate([index for index in edge_indices], axis=0)\n        edge_indices = torch.from_numpy(edge_indices).to(device).long()\n        edge_indices_list.append(edge_indices)\n\n    return edge_indices_list\n"
  },
  {
    "path": "mmdet3d/models/utils/gen_keypoints.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet3d.core.bbox import points_cam2img\n\n\ndef get_keypoints(gt_bboxes_3d_list,\n                  centers2d_list,\n                  img_metas,\n                  use_local_coords=True):\n    \"\"\"Function to filter the objects label outside the image.\n\n    Args:\n        gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image,\n            shape (num_gt, 4).\n        centers2d_list (list[Tensor]): Projected 3D centers onto 2D image,\n            shape (num_gt, 2).\n        img_metas (list[dict]): Meta information of each image, e.g.,\n            image size, scaling factor, etc.\n        use_local_coords (bool, optional): Wheher to use local coordinates\n            for keypoints. Default: True.\n\n    Returns:\n        tuple[list[Tensor]]: It contains two elements, the first is the\n        keypoints for each projected 2D bbox in batch data. The second is\n        the visible mask of depth calculated by keypoints.\n    \"\"\"\n\n    assert len(gt_bboxes_3d_list) == len(centers2d_list)\n    bs = len(gt_bboxes_3d_list)\n    keypoints2d_list = []\n    keypoints_depth_mask_list = []\n\n    for i in range(bs):\n        gt_bboxes_3d = gt_bboxes_3d_list[i]\n        centers2d = centers2d_list[i]\n        img_shape = img_metas[i]['img_shape']\n        cam2img = img_metas[i]['cam2img']\n        h, w = img_shape[:2]\n        # (N, 8, 3)\n        corners3d = gt_bboxes_3d.corners\n        top_centers3d = torch.mean(corners3d[:, [0, 1, 4, 5], :], dim=1)\n        bot_centers3d = torch.mean(corners3d[:, [2, 3, 6, 7], :], dim=1)\n        # (N, 2, 3)\n        top_bot_centers3d = torch.stack((top_centers3d, bot_centers3d), dim=1)\n        keypoints3d = torch.cat((corners3d, top_bot_centers3d), dim=1)\n        # (N, 10, 2)\n        keypoints2d = points_cam2img(keypoints3d, cam2img)\n\n        # keypoints mask: keypoints must be inside\n        # the image and in front of the camera\n        keypoints_x_visible = (keypoints2d[..., 0] >= 0) & (\n            keypoints2d[..., 0] <= w - 1)\n        keypoints_y_visible = (keypoints2d[..., 1] >= 0) & (\n            keypoints2d[..., 1] <= h - 1)\n        keypoints_z_visible = (keypoints3d[..., -1] > 0)\n\n        # (N, 1O)\n        keypoints_visible = keypoints_x_visible & \\\n            keypoints_y_visible & keypoints_z_visible\n        # center, diag-02, diag-13\n        keypoints_depth_valid = torch.stack(\n            (keypoints_visible[:, [8, 9]].all(dim=1),\n             keypoints_visible[:, [0, 3, 5, 6]].all(dim=1),\n             keypoints_visible[:, [1, 2, 4, 7]].all(dim=1)),\n            dim=1)\n        keypoints_visible = keypoints_visible.float()\n\n        if use_local_coords:\n            keypoints2d = torch.cat((keypoints2d - centers2d.unsqueeze(1),\n                                     keypoints_visible.unsqueeze(-1)),\n                                    dim=2)\n        else:\n            keypoints2d = torch.cat(\n                (keypoints2d, keypoints_visible.unsqueeze(-1)), dim=2)\n\n        keypoints2d_list.append(keypoints2d)\n        keypoints_depth_mask_list.append(keypoints_depth_valid)\n\n    return (keypoints2d_list, keypoints_depth_mask_list)\n"
  },
  {
    "path": "mmdet3d/models/utils/handle_objs.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\n\ndef filter_outside_objs(gt_bboxes_list, gt_labels_list, gt_bboxes_3d_list,\n                        gt_labels_3d_list, centers2d_list, img_metas):\n    \"\"\"Function to filter the objects label outside the image.\n\n    Args:\n        gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image,\n            each has shape (num_gt, 4).\n        gt_labels_list (list[Tensor]): Ground truth labels of each box,\n            each has shape (num_gt,).\n        gt_bboxes_3d_list (list[Tensor]): 3D Ground truth bboxes of each\n            image, each has shape (num_gt, bbox_code_size).\n        gt_labels_3d_list (list[Tensor]): 3D Ground truth labels of each\n            box, each has shape (num_gt,).\n        centers2d_list (list[Tensor]): Projected 3D centers onto 2D image,\n            each has shape (num_gt, 2).\n        img_metas (list[dict]): Meta information of each image, e.g.,\n            image size, scaling factor, etc.\n    \"\"\"\n    bs = len(centers2d_list)\n\n    for i in range(bs):\n        centers2d = centers2d_list[i].clone()\n        img_shape = img_metas[i]['img_shape']\n        keep_inds = (centers2d[:, 0] > 0) & \\\n            (centers2d[:, 0] < img_shape[1]) & \\\n            (centers2d[:, 1] > 0) & \\\n            (centers2d[:, 1] < img_shape[0])\n        centers2d_list[i] = centers2d[keep_inds]\n        gt_labels_list[i] = gt_labels_list[i][keep_inds]\n        gt_bboxes_list[i] = gt_bboxes_list[i][keep_inds]\n        gt_bboxes_3d_list[i].tensor = gt_bboxes_3d_list[i].tensor[keep_inds]\n        gt_labels_3d_list[i] = gt_labels_3d_list[i][keep_inds]\n\n\ndef get_centers2d_target(centers2d, centers, img_shape):\n    \"\"\"Function to get target centers2d.\n\n    Args:\n        centers2d (Tensor): Projected 3D centers onto 2D images.\n        centers (Tensor): Centers of 2d gt bboxes.\n        img_shape (tuple): Resized image shape.\n\n    Returns:\n        torch.Tensor: Projected 3D centers (centers2D) target.\n    \"\"\"\n    N = centers2d.shape[0]\n    h, w = img_shape[:2]\n    valid_intersects = centers2d.new_zeros((N, 2))\n    a = (centers[:, 1] - centers2d[:, 1]) / (centers[:, 0] - centers2d[:, 0])\n    b = centers[:, 1] - a * centers[:, 0]\n    left_y = b\n    right_y = (w - 1) * a + b\n    top_x = -b / a\n    bottom_x = (h - 1 - b) / a\n\n    left_coors = torch.stack((left_y.new_zeros(N, ), left_y), dim=1)\n    right_coors = torch.stack((right_y.new_full((N, ), w - 1), right_y), dim=1)\n    top_coors = torch.stack((top_x, top_x.new_zeros(N, )), dim=1)\n    bottom_coors = torch.stack((bottom_x, bottom_x.new_full((N, ), h - 1)),\n                               dim=1)\n\n    intersects = torch.stack(\n        [left_coors, right_coors, top_coors, bottom_coors], dim=1)\n    intersects_x = intersects[:, :, 0]\n    intersects_y = intersects[:, :, 1]\n    inds = (intersects_x >= 0) & (intersects_x <=\n                                  w - 1) & (intersects_y >= 0) & (\n                                      intersects_y <= h - 1)\n    valid_intersects = intersects[inds].reshape(N, 2, 2)\n    dist = torch.norm(valid_intersects - centers2d.unsqueeze(1), dim=2)\n    min_idx = torch.argmin(dist, dim=1)\n\n    min_idx = min_idx.unsqueeze(-1).unsqueeze(-1).expand(-1, -1, 2)\n    centers2d_target = valid_intersects.gather(dim=1, index=min_idx).squeeze(1)\n\n    return centers2d_target\n\n\ndef handle_proj_objs(centers2d_list, gt_bboxes_list, img_metas):\n    \"\"\"Function to handle projected object centers2d, generate target\n    centers2d.\n\n    Args:\n        gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image,\n            shape (num_gt, 4).\n        centers2d_list (list[Tensor]): Projected 3D centers onto 2D image,\n            shape (num_gt, 2).\n        img_metas (list[dict]): Meta information of each image, e.g.,\n            image size, scaling factor, etc.\n\n    Returns:\n        tuple[list[Tensor]]: It contains three elements. The first is the\n        target centers2d after handling the truncated objects. The second\n        is the offsets between target centers2d and round int dtype\n        centers2d,and the last is the truncation mask for each object in\n        batch data.\n    \"\"\"\n    bs = len(centers2d_list)\n    centers2d_target_list = []\n    trunc_mask_list = []\n    offsets2d_list = []\n    # for now, only pad mode that img is padded by right and\n    # bottom side is supported.\n    for i in range(bs):\n        centers2d = centers2d_list[i]\n        gt_bbox = gt_bboxes_list[i]\n        img_shape = img_metas[i]['img_shape']\n        centers2d_target = centers2d.clone()\n        inside_inds = (centers2d[:, 0] > 0) & \\\n            (centers2d[:, 0] < img_shape[1]) & \\\n            (centers2d[:, 1] > 0) & \\\n            (centers2d[:, 1] < img_shape[0])\n        outside_inds = ~inside_inds\n\n        # if there are outside objects\n        if outside_inds.any():\n            centers = (gt_bbox[:, :2] + gt_bbox[:, 2:]) / 2\n            outside_centers2d = centers2d[outside_inds]\n            match_centers = centers[outside_inds]\n            target_outside_centers2d = get_centers2d_target(\n                outside_centers2d, match_centers, img_shape)\n            centers2d_target[outside_inds] = target_outside_centers2d\n\n        offsets2d = centers2d - centers2d_target.round().int()\n        trunc_mask = outside_inds\n\n        centers2d_target_list.append(centers2d_target)\n        trunc_mask_list.append(trunc_mask)\n        offsets2d_list.append(offsets2d)\n\n    return (centers2d_target_list, offsets2d_list, trunc_mask_list)\n"
  },
  {
    "path": "mmdet3d/models/utils/mlp.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule\nfrom torch import nn as nn\n\n\nclass MLP(BaseModule):\n    \"\"\"A simple MLP module.\n\n    Pass features (B, C, N) through an MLP.\n\n    Args:\n        in_channels (int, optional): Number of channels of input features.\n            Default: 18.\n        conv_channels (tuple[int], optional): Out channels of the convolution.\n            Default: (256, 256).\n        conv_cfg (dict, optional): Config of convolution.\n            Default: dict(type='Conv1d').\n        norm_cfg (dict, optional): Config of normalization.\n            Default: dict(type='BN1d').\n        act_cfg (dict, optional): Config of activation.\n            Default: dict(type='ReLU').\n    \"\"\"\n\n    def __init__(self,\n                 in_channel=18,\n                 conv_channels=(256, 256),\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=dict(type='BN1d'),\n                 act_cfg=dict(type='ReLU'),\n                 init_cfg=None):\n        super().__init__(init_cfg=init_cfg)\n        self.mlp = nn.Sequential()\n        prev_channels = in_channel\n        for i, conv_channel in enumerate(conv_channels):\n            self.mlp.add_module(\n                f'layer{i}',\n                ConvModule(\n                    prev_channels,\n                    conv_channels[i],\n                    1,\n                    padding=0,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    act_cfg=act_cfg,\n                    bias=True,\n                    inplace=True))\n            prev_channels = conv_channels[i]\n\n    def forward(self, img_features):\n        return self.mlp(img_features)\n"
  },
  {
    "path": "mmdet3d/models/voxel_encoders/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .pillar_encoder import DynamicPillarFeatureNet, PillarFeatureNet\nfrom .voxel_encoder import DynamicSimpleVFE, DynamicVFE, HardSimpleVFE, HardVFE\n\n__all__ = [\n    'PillarFeatureNet', 'DynamicPillarFeatureNet', 'HardVFE', 'DynamicVFE',\n    'HardSimpleVFE', 'DynamicSimpleVFE'\n]\n"
  },
  {
    "path": "mmdet3d/models/voxel_encoders/pillar_encoder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.cnn import build_norm_layer\nfrom mmcv.ops import DynamicScatter\nfrom mmcv.runner import force_fp32\nfrom torch import nn\n\nfrom ..builder import VOXEL_ENCODERS\nfrom .utils import PFNLayer, get_paddings_indicator\n\n\n@VOXEL_ENCODERS.register_module()\nclass PillarFeatureNet(nn.Module):\n    \"\"\"Pillar Feature Net.\n\n    The network prepares the pillar features and performs forward pass\n    through PFNLayers.\n\n    Args:\n        in_channels (int, optional): Number of input features,\n            either x, y, z or x, y, z, r. Defaults to 4.\n        feat_channels (tuple, optional): Number of features in each of the\n            N PFNLayers. Defaults to (64, ).\n        with_distance (bool, optional): Whether to include Euclidean distance\n            to points. Defaults to False.\n        with_cluster_center (bool, optional): [description]. Defaults to True.\n        with_voxel_center (bool, optional): [description]. Defaults to True.\n        voxel_size (tuple[float], optional): Size of voxels, only utilize x\n            and y size. Defaults to (0.2, 0.2, 4).\n        point_cloud_range (tuple[float], optional): Point cloud range, only\n            utilizes x and y min. Defaults to (0, -40, -3, 70.4, 40, 1).\n        norm_cfg ([type], optional): [description].\n            Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01).\n        mode (str, optional): The mode to gather point features. Options are\n            'max' or 'avg'. Defaults to 'max'.\n        legacy (bool, optional): Whether to use the new behavior or\n            the original behavior. Defaults to True.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=4,\n                 feat_channels=(64, ),\n                 with_distance=False,\n                 with_cluster_center=True,\n                 with_voxel_center=True,\n                 voxel_size=(0.2, 0.2, 4),\n                 point_cloud_range=(0, -40, -3, 70.4, 40, 1),\n                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n                 mode='max',\n                 legacy=True):\n        super(PillarFeatureNet, self).__init__()\n        assert len(feat_channels) > 0\n        self.legacy = legacy\n        if with_cluster_center:\n            in_channels += 3\n        if with_voxel_center:\n            in_channels += 3\n        if with_distance:\n            in_channels += 1\n        self._with_distance = with_distance\n        self._with_cluster_center = with_cluster_center\n        self._with_voxel_center = with_voxel_center\n        self.fp16_enabled = False\n        # Create PillarFeatureNet layers\n        self.in_channels = in_channels\n        feat_channels = [in_channels] + list(feat_channels)\n        pfn_layers = []\n        for i in range(len(feat_channels) - 1):\n            in_filters = feat_channels[i]\n            out_filters = feat_channels[i + 1]\n            if i < len(feat_channels) - 2:\n                last_layer = False\n            else:\n                last_layer = True\n            pfn_layers.append(\n                PFNLayer(\n                    in_filters,\n                    out_filters,\n                    norm_cfg=norm_cfg,\n                    last_layer=last_layer,\n                    mode=mode))\n        self.pfn_layers = nn.ModuleList(pfn_layers)\n\n        # Need pillar (voxel) size and x/y offset in order to calculate offset\n        self.vx = voxel_size[0]\n        self.vy = voxel_size[1]\n        self.vz = voxel_size[2]\n        self.x_offset = self.vx / 2 + point_cloud_range[0]\n        self.y_offset = self.vy / 2 + point_cloud_range[1]\n        self.z_offset = self.vz / 2 + point_cloud_range[2]\n        self.point_cloud_range = point_cloud_range\n\n    @force_fp32(out_fp16=True)\n    def forward(self, features, num_points, coors):\n        \"\"\"Forward function.\n\n        Args:\n            features (torch.Tensor): Point features or raw points in shape\n                (N, M, C).\n            num_points (torch.Tensor): Number of points in each pillar.\n            coors (torch.Tensor): Coordinates of each voxel.\n\n        Returns:\n            torch.Tensor: Features of pillars.\n        \"\"\"\n        features_ls = [features]\n        # Find distance of x, y, and z from cluster center\n        if self._with_cluster_center:\n            points_mean = features[:, :, :3].sum(\n                dim=1, keepdim=True) / num_points.type_as(features).view(\n                    -1, 1, 1)\n            f_cluster = features[:, :, :3] - points_mean\n            features_ls.append(f_cluster)\n\n        # Find distance of x, y, and z from pillar center\n        dtype = features.dtype\n        if self._with_voxel_center:\n            if not self.legacy:\n                f_center = torch.zeros_like(features[:, :, :3])\n                f_center[:, :, 0] = features[:, :, 0] - (\n                    coors[:, 3].to(dtype).unsqueeze(1) * self.vx +\n                    self.x_offset)\n                f_center[:, :, 1] = features[:, :, 1] - (\n                    coors[:, 2].to(dtype).unsqueeze(1) * self.vy +\n                    self.y_offset)\n                f_center[:, :, 2] = features[:, :, 2] - (\n                    coors[:, 1].to(dtype).unsqueeze(1) * self.vz +\n                    self.z_offset)\n            else:\n                f_center = features[:, :, :3]\n                f_center[:, :, 0] = f_center[:, :, 0] - (\n                    coors[:, 3].type_as(features).unsqueeze(1) * self.vx +\n                    self.x_offset)\n                f_center[:, :, 1] = f_center[:, :, 1] - (\n                    coors[:, 2].type_as(features).unsqueeze(1) * self.vy +\n                    self.y_offset)\n                f_center[:, :, 2] = f_center[:, :, 2] - (\n                    coors[:, 1].type_as(features).unsqueeze(1) * self.vz +\n                    self.z_offset)\n            features_ls.append(f_center)\n\n        if self._with_distance:\n            points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)\n            features_ls.append(points_dist)\n\n        # Combine together feature decorations\n        features = torch.cat(features_ls, dim=-1)\n        # The feature decorations were calculated without regard to whether\n        # pillar was empty. Need to ensure that\n        # empty pillars remain set to zeros.\n        voxel_count = features.shape[1]\n        mask = get_paddings_indicator(num_points, voxel_count, axis=0)\n        mask = torch.unsqueeze(mask, -1).type_as(features)\n        features *= mask\n\n        for pfn in self.pfn_layers:\n            features = pfn(features, num_points)\n\n        return features.squeeze(1)\n\n\n@VOXEL_ENCODERS.register_module()\nclass DynamicPillarFeatureNet(PillarFeatureNet):\n    \"\"\"Pillar Feature Net using dynamic voxelization.\n\n    The network prepares the pillar features and performs forward pass\n    through PFNLayers. The main difference is that it is used for\n    dynamic voxels, which contains different number of points inside a voxel\n    without limits.\n\n    Args:\n        in_channels (int, optional): Number of input features,\n            either x, y, z or x, y, z, r. Defaults to 4.\n        feat_channels (tuple, optional): Number of features in each of the\n            N PFNLayers. Defaults to (64, ).\n        with_distance (bool, optional): Whether to include Euclidean distance\n            to points. Defaults to False.\n        with_cluster_center (bool, optional): [description]. Defaults to True.\n        with_voxel_center (bool, optional): [description]. Defaults to True.\n        voxel_size (tuple[float], optional): Size of voxels, only utilize x\n            and y size. Defaults to (0.2, 0.2, 4).\n        point_cloud_range (tuple[float], optional): Point cloud range, only\n            utilizes x and y min. Defaults to (0, -40, -3, 70.4, 40, 1).\n        norm_cfg ([type], optional): [description].\n            Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01).\n        mode (str, optional): The mode to gather point features. Options are\n            'max' or 'avg'. Defaults to 'max'.\n        legacy (bool, optional): Whether to use the new behavior or\n            the original behavior. Defaults to True.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=4,\n                 feat_channels=(64, ),\n                 with_distance=False,\n                 with_cluster_center=True,\n                 with_voxel_center=True,\n                 voxel_size=(0.2, 0.2, 4),\n                 point_cloud_range=(0, -40, -3, 70.4, 40, 1),\n                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n                 mode='max',\n                 legacy=True):\n        super(DynamicPillarFeatureNet, self).__init__(\n            in_channels,\n            feat_channels,\n            with_distance,\n            with_cluster_center=with_cluster_center,\n            with_voxel_center=with_voxel_center,\n            voxel_size=voxel_size,\n            point_cloud_range=point_cloud_range,\n            norm_cfg=norm_cfg,\n            mode=mode,\n            legacy=legacy)\n        self.fp16_enabled = False\n        feat_channels = [self.in_channels] + list(feat_channels)\n        pfn_layers = []\n        # TODO: currently only support one PFNLayer\n\n        for i in range(len(feat_channels) - 1):\n            in_filters = feat_channels[i]\n            out_filters = feat_channels[i + 1]\n            if i > 0:\n                in_filters *= 2\n            norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)\n            pfn_layers.append(\n                nn.Sequential(\n                    nn.Linear(in_filters, out_filters, bias=False), norm_layer,\n                    nn.ReLU(inplace=True)))\n        self.num_pfn = len(pfn_layers)\n        self.pfn_layers = nn.ModuleList(pfn_layers)\n        self.pfn_scatter = DynamicScatter(voxel_size, point_cloud_range,\n                                          (mode != 'max'))\n        self.cluster_scatter = DynamicScatter(\n            voxel_size, point_cloud_range, average_points=True)\n\n    def map_voxel_center_to_point(self, pts_coors, voxel_mean, voxel_coors):\n        \"\"\"Map the centers of voxels to its corresponding points.\n\n        Args:\n            pts_coors (torch.Tensor): The coordinates of each points, shape\n                (M, 3), where M is the number of points.\n            voxel_mean (torch.Tensor): The mean or aggregated features of a\n                voxel, shape (N, C), where N is the number of voxels.\n            voxel_coors (torch.Tensor): The coordinates of each voxel.\n\n        Returns:\n            torch.Tensor: Corresponding voxel centers of each points, shape\n                (M, C), where M is the number of points.\n        \"\"\"\n        # Step 1: scatter voxel into canvas\n        # Calculate necessary things for canvas creation\n        canvas_y = int(\n            (self.point_cloud_range[4] - self.point_cloud_range[1]) / self.vy)\n        canvas_x = int(\n            (self.point_cloud_range[3] - self.point_cloud_range[0]) / self.vx)\n        canvas_channel = voxel_mean.size(1)\n        batch_size = pts_coors[-1, 0] + 1\n        canvas_len = canvas_y * canvas_x * batch_size\n        # Create the canvas for this sample\n        canvas = voxel_mean.new_zeros(canvas_channel, canvas_len)\n        # Only include non-empty pillars\n        indices = (\n            voxel_coors[:, 0] * canvas_y * canvas_x +\n            voxel_coors[:, 2] * canvas_x + voxel_coors[:, 3])\n        # Scatter the blob back to the canvas\n        canvas[:, indices.long()] = voxel_mean.t()\n\n        # Step 2: get voxel mean for each point\n        voxel_index = (\n            pts_coors[:, 0] * canvas_y * canvas_x +\n            pts_coors[:, 2] * canvas_x + pts_coors[:, 3])\n        center_per_point = canvas[:, voxel_index.long()].t()\n        return center_per_point\n\n    @force_fp32(out_fp16=True)\n    def forward(self, features, coors):\n        \"\"\"Forward function.\n\n        Args:\n            features (torch.Tensor): Point features or raw points in shape\n                (N, M, C).\n            coors (torch.Tensor): Coordinates of each voxel\n\n        Returns:\n            torch.Tensor: Features of pillars.\n        \"\"\"\n        features_ls = [features]\n        # Find distance of x, y, and z from cluster center\n        if self._with_cluster_center:\n            voxel_mean, mean_coors = self.cluster_scatter(features, coors)\n            points_mean = self.map_voxel_center_to_point(\n                coors, voxel_mean, mean_coors)\n            # TODO: maybe also do cluster for reflectivity\n            f_cluster = features[:, :3] - points_mean[:, :3]\n            features_ls.append(f_cluster)\n\n        # Find distance of x, y, and z from pillar center\n        if self._with_voxel_center:\n            f_center = features.new_zeros(size=(features.size(0), 3))\n            f_center[:, 0] = features[:, 0] - (\n                coors[:, 3].type_as(features) * self.vx + self.x_offset)\n            f_center[:, 1] = features[:, 1] - (\n                coors[:, 2].type_as(features) * self.vy + self.y_offset)\n            f_center[:, 2] = features[:, 2] - (\n                coors[:, 1].type_as(features) * self.vz + self.z_offset)\n            features_ls.append(f_center)\n\n        if self._with_distance:\n            points_dist = torch.norm(features[:, :3], 2, 1, keepdim=True)\n            features_ls.append(points_dist)\n\n        # Combine together feature decorations\n        features = torch.cat(features_ls, dim=-1)\n        for i, pfn in enumerate(self.pfn_layers):\n            point_feats = pfn(features)\n            voxel_feats, voxel_coors = self.pfn_scatter(point_feats, coors)\n            if i != len(self.pfn_layers) - 1:\n                # need to concat voxel feats if it is not the last pfn\n                feat_per_point = self.map_voxel_center_to_point(\n                    coors, voxel_feats, voxel_coors)\n                features = torch.cat([point_feats, feat_per_point], dim=1)\n\n        return voxel_feats, voxel_coors\n"
  },
  {
    "path": "mmdet3d/models/voxel_encoders/utils.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.cnn import build_norm_layer\nfrom mmcv.runner import auto_fp16\nfrom torch import nn\nfrom torch.nn import functional as F\n\n\ndef get_paddings_indicator(actual_num, max_num, axis=0):\n    \"\"\"Create boolean mask by actually number of a padded tensor.\n\n    Args:\n        actual_num (torch.Tensor): Actual number of points in each voxel.\n        max_num (int): Max number of points in each voxel\n\n    Returns:\n        torch.Tensor: Mask indicates which points are valid inside a voxel.\n    \"\"\"\n    actual_num = torch.unsqueeze(actual_num, axis + 1)\n    # tiled_actual_num: [N, M, 1]\n    max_num_shape = [1] * len(actual_num.shape)\n    max_num_shape[axis + 1] = -1\n    max_num = torch.arange(\n        max_num, dtype=torch.int, device=actual_num.device).view(max_num_shape)\n    # tiled_actual_num: [[3,3,3,3,3], [4,4,4,4,4], [2,2,2,2,2]]\n    # tiled_max_num: [[0,1,2,3,4], [0,1,2,3,4], [0,1,2,3,4]]\n    paddings_indicator = actual_num.int() > max_num\n    # paddings_indicator shape: [batch_size, max_num]\n    return paddings_indicator\n\n\nclass VFELayer(nn.Module):\n    \"\"\"Voxel Feature Encoder layer.\n\n    The voxel encoder is composed of a series of these layers.\n    This module do not support average pooling and only support to use\n    max pooling to gather features inside a VFE.\n\n    Args:\n        in_channels (int): Number of input channels.\n        out_channels (int): Number of output channels.\n        norm_cfg (dict): Config dict of normalization layers\n        max_out (bool): Whether aggregate the features of points inside\n            each voxel and only return voxel features.\n        cat_max (bool): Whether concatenate the aggregated features\n            and pointwise features.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n                 max_out=True,\n                 cat_max=True):\n        super(VFELayer, self).__init__()\n        self.fp16_enabled = False\n        self.cat_max = cat_max\n        self.max_out = max_out\n        # self.units = int(out_channels / 2)\n\n        self.norm = build_norm_layer(norm_cfg, out_channels)[1]\n        self.linear = nn.Linear(in_channels, out_channels, bias=False)\n\n    @auto_fp16(apply_to=('inputs'), out_fp32=True)\n    def forward(self, inputs):\n        \"\"\"Forward function.\n\n        Args:\n            inputs (torch.Tensor): Voxels features of shape (N, M, C).\n                N is the number of voxels, M is the number of points in\n                voxels, C is the number of channels of point features.\n\n        Returns:\n            torch.Tensor: Voxel features. There are three mode under which the\n                features have different meaning.\n                - `max_out=False`: Return point-wise features in\n                    shape (N, M, C).\n                - `max_out=True` and `cat_max=False`: Return aggregated\n                    voxel features in shape (N, C)\n                - `max_out=True` and `cat_max=True`: Return concatenated\n                    point-wise features in shape (N, M, C).\n        \"\"\"\n        # [K, T, 7] tensordot [7, units] = [K, T, units]\n        voxel_count = inputs.shape[1]\n\n        x = self.linear(inputs)\n        x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2,\n                                                               1).contiguous()\n        pointwise = F.relu(x)\n        # [K, T, units]\n        if self.max_out:\n            aggregated = torch.max(pointwise, dim=1, keepdim=True)[0]\n        else:\n            # this is for fusion layer\n            return pointwise\n\n        if not self.cat_max:\n            return aggregated.squeeze(1)\n        else:\n            # [K, 1, units]\n            repeated = aggregated.repeat(1, voxel_count, 1)\n            concatenated = torch.cat([pointwise, repeated], dim=2)\n            # [K, T, 2 * units]\n            return concatenated\n\n\nclass PFNLayer(nn.Module):\n    \"\"\"Pillar Feature Net Layer.\n\n    The Pillar Feature Net is composed of a series of these layers, but the\n    PointPillars paper results only used a single PFNLayer.\n\n    Args:\n        in_channels (int): Number of input channels.\n        out_channels (int): Number of output channels.\n        norm_cfg (dict, optional): Config dict of normalization layers.\n            Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01).\n        last_layer (bool, optional): If last_layer, there is no\n            concatenation of features. Defaults to False.\n        mode (str, optional): Pooling model to gather features inside voxels.\n            Defaults to 'max'.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n                 last_layer=False,\n                 mode='max'):\n\n        super().__init__()\n        self.fp16_enabled = False\n        self.name = 'PFNLayer'\n        self.last_vfe = last_layer\n        if not self.last_vfe:\n            out_channels = out_channels // 2\n        self.units = out_channels\n\n        self.norm = build_norm_layer(norm_cfg, self.units)[1]\n        self.linear = nn.Linear(in_channels, self.units, bias=False)\n\n        assert mode in ['max', 'avg']\n        self.mode = mode\n\n    @auto_fp16(apply_to=('inputs'), out_fp32=True)\n    def forward(self, inputs, num_voxels=None, aligned_distance=None):\n        \"\"\"Forward function.\n\n        Args:\n            inputs (torch.Tensor): Pillar/Voxel inputs with shape (N, M, C).\n                N is the number of voxels, M is the number of points in\n                voxels, C is the number of channels of point features.\n            num_voxels (torch.Tensor, optional): Number of points in each\n                voxel. Defaults to None.\n            aligned_distance (torch.Tensor, optional): The distance of\n                each points to the voxel center. Defaults to None.\n\n        Returns:\n            torch.Tensor: Features of Pillars.\n        \"\"\"\n        x = self.linear(inputs)\n        x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2,\n                                                               1).contiguous()\n        x = F.relu(x)\n\n        if self.mode == 'max':\n            if aligned_distance is not None:\n                x = x.mul(aligned_distance.unsqueeze(-1))\n            x_max = torch.max(x, dim=1, keepdim=True)[0]\n        elif self.mode == 'avg':\n            if aligned_distance is not None:\n                x = x.mul(aligned_distance.unsqueeze(-1))\n            x_max = x.sum(\n                dim=1, keepdim=True) / num_voxels.type_as(inputs).view(\n                    -1, 1, 1)\n\n        if self.last_vfe:\n            return x_max\n        else:\n            x_repeat = x_max.repeat(1, inputs.shape[1], 1)\n            x_concatenated = torch.cat([x, x_repeat], dim=2)\n            return x_concatenated\n"
  },
  {
    "path": "mmdet3d/models/voxel_encoders/voxel_encoder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.cnn import build_norm_layer\nfrom mmcv.ops import DynamicScatter\nfrom mmcv.runner import force_fp32\nfrom torch import nn\n\nfrom .. import builder\nfrom ..builder import VOXEL_ENCODERS\nfrom .utils import VFELayer, get_paddings_indicator\n\n\n@VOXEL_ENCODERS.register_module()\nclass HardSimpleVFE(nn.Module):\n    \"\"\"Simple voxel feature encoder used in SECOND.\n\n    It simply averages the values of points in a voxel.\n\n    Args:\n        num_features (int, optional): Number of features to use. Default: 4.\n    \"\"\"\n\n    def __init__(self, num_features=4):\n        super(HardSimpleVFE, self).__init__()\n        self.num_features = num_features\n        self.fp16_enabled = False\n\n    @force_fp32(out_fp16=True)\n    def forward(self, features, num_points, coors):\n        \"\"\"Forward function.\n\n        Args:\n            features (torch.Tensor): Point features in shape\n                (N, M, 3(4)). N is the number of voxels and M is the maximum\n                number of points inside a single voxel.\n            num_points (torch.Tensor): Number of points in each voxel,\n                 shape (N, ).\n            coors (torch.Tensor): Coordinates of voxels.\n\n        Returns:\n            torch.Tensor: Mean of points inside each voxel in shape (N, 3(4))\n        \"\"\"\n        points_mean = features[:, :, :self.num_features].sum(\n            dim=1, keepdim=False) / num_points.type_as(features).view(-1, 1)\n        return points_mean.contiguous()\n\n\n@VOXEL_ENCODERS.register_module()\nclass DynamicSimpleVFE(nn.Module):\n    \"\"\"Simple dynamic voxel feature encoder used in DV-SECOND.\n\n    It simply averages the values of points in a voxel.\n    But the number of points in a voxel is dynamic and varies.\n\n    Args:\n        voxel_size (tupe[float]): Size of a single voxel\n        point_cloud_range (tuple[float]): Range of the point cloud and voxels\n    \"\"\"\n\n    def __init__(self,\n                 voxel_size=(0.2, 0.2, 4),\n                 point_cloud_range=(0, -40, -3, 70.4, 40, 1)):\n        super(DynamicSimpleVFE, self).__init__()\n        self.scatter = DynamicScatter(voxel_size, point_cloud_range, True)\n        self.fp16_enabled = False\n\n    @torch.no_grad()\n    @force_fp32(out_fp16=True)\n    def forward(self, features, coors):\n        \"\"\"Forward function.\n\n        Args:\n            features (torch.Tensor): Point features in shape\n                (N, 3(4)). N is the number of points.\n            coors (torch.Tensor): Coordinates of voxels.\n\n        Returns:\n            torch.Tensor: Mean of points inside each voxel in shape (M, 3(4)).\n                M is the number of voxels.\n        \"\"\"\n        # This function is used from the start of the voxelnet\n        # num_points: [concated_num_points]\n        features, features_coors = self.scatter(features, coors)\n        return features, features_coors\n\n\n@VOXEL_ENCODERS.register_module()\nclass DynamicVFE(nn.Module):\n    \"\"\"Dynamic Voxel feature encoder used in DV-SECOND.\n\n    It encodes features of voxels and their points. It could also fuse\n    image feature into voxel features in a point-wise manner.\n    The number of points inside the voxel varies.\n\n    Args:\n        in_channels (int, optional): Input channels of VFE. Defaults to 4.\n        feat_channels (list(int), optional): Channels of features in VFE.\n        with_distance (bool, optional): Whether to use the L2 distance of\n            points to the origin point. Defaults to False.\n        with_cluster_center (bool, optional): Whether to use the distance\n            to cluster center of points inside a voxel. Defaults to False.\n        with_voxel_center (bool, optional): Whether to use the distance\n            to center of voxel for each points inside a voxel.\n            Defaults to False.\n        voxel_size (tuple[float], optional): Size of a single voxel.\n            Defaults to (0.2, 0.2, 4).\n        point_cloud_range (tuple[float], optional): The range of points\n            or voxels. Defaults to (0, -40, -3, 70.4, 40, 1).\n        norm_cfg (dict, optional): Config dict of normalization layers.\n        mode (str, optional): The mode when pooling features of points\n            inside a voxel. Available options include 'max' and 'avg'.\n            Defaults to 'max'.\n        fusion_layer (dict, optional): The config dict of fusion\n            layer used in multi-modal detectors. Defaults to None.\n        return_point_feats (bool, optional): Whether to return the features\n            of each points. Defaults to False.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=4,\n                 feat_channels=[],\n                 with_distance=False,\n                 with_cluster_center=False,\n                 with_voxel_center=False,\n                 voxel_size=(0.2, 0.2, 4),\n                 point_cloud_range=(0, -40, -3, 70.4, 40, 1),\n                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n                 mode='max',\n                 fusion_layer=None,\n                 return_point_feats=False):\n        super(DynamicVFE, self).__init__()\n        assert mode in ['avg', 'max']\n        assert len(feat_channels) > 0\n        if with_cluster_center:\n            in_channels += 3\n        if with_voxel_center:\n            in_channels += 3\n        if with_distance:\n            in_channels += 1\n        self.in_channels = in_channels\n        self._with_distance = with_distance\n        self._with_cluster_center = with_cluster_center\n        self._with_voxel_center = with_voxel_center\n        self.return_point_feats = return_point_feats\n        self.fp16_enabled = False\n\n        # Need pillar (voxel) size and x/y offset in order to calculate offset\n        self.vx = voxel_size[0]\n        self.vy = voxel_size[1]\n        self.vz = voxel_size[2]\n        self.x_offset = self.vx / 2 + point_cloud_range[0]\n        self.y_offset = self.vy / 2 + point_cloud_range[1]\n        self.z_offset = self.vz / 2 + point_cloud_range[2]\n        self.point_cloud_range = point_cloud_range\n        self.scatter = DynamicScatter(voxel_size, point_cloud_range, True)\n\n        feat_channels = [self.in_channels] + list(feat_channels)\n        vfe_layers = []\n        for i in range(len(feat_channels) - 1):\n            in_filters = feat_channels[i]\n            out_filters = feat_channels[i + 1]\n            if i > 0:\n                in_filters *= 2\n            norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)\n            vfe_layers.append(\n                nn.Sequential(\n                    nn.Linear(in_filters, out_filters, bias=False), norm_layer,\n                    nn.ReLU(inplace=True)))\n        self.vfe_layers = nn.ModuleList(vfe_layers)\n        self.num_vfe = len(vfe_layers)\n        self.vfe_scatter = DynamicScatter(voxel_size, point_cloud_range,\n                                          (mode != 'max'))\n        self.cluster_scatter = DynamicScatter(\n            voxel_size, point_cloud_range, average_points=True)\n        self.fusion_layer = None\n        if fusion_layer is not None:\n            self.fusion_layer = builder.build_fusion_layer(fusion_layer)\n\n    def map_voxel_center_to_point(self, pts_coors, voxel_mean, voxel_coors):\n        \"\"\"Map voxel features to its corresponding points.\n\n        Args:\n            pts_coors (torch.Tensor): Voxel coordinate of each point.\n            voxel_mean (torch.Tensor): Voxel features to be mapped.\n            voxel_coors (torch.Tensor): Coordinates of valid voxels\n\n        Returns:\n            torch.Tensor: Features or centers of each point.\n        \"\"\"\n        # Step 1: scatter voxel into canvas\n        # Calculate necessary things for canvas creation\n        canvas_z = int(\n            (self.point_cloud_range[5] - self.point_cloud_range[2]) / self.vz)\n        canvas_y = int(\n            (self.point_cloud_range[4] - self.point_cloud_range[1]) / self.vy)\n        canvas_x = int(\n            (self.point_cloud_range[3] - self.point_cloud_range[0]) / self.vx)\n        # canvas_channel = voxel_mean.size(1)\n        batch_size = pts_coors[-1, 0] + 1\n        canvas_len = canvas_z * canvas_y * canvas_x * batch_size\n        # Create the canvas for this sample\n        canvas = voxel_mean.new_zeros(canvas_len, dtype=torch.long)\n        # Only include non-empty pillars\n        indices = (\n            voxel_coors[:, 0] * canvas_z * canvas_y * canvas_x +\n            voxel_coors[:, 1] * canvas_y * canvas_x +\n            voxel_coors[:, 2] * canvas_x + voxel_coors[:, 3])\n        # Scatter the blob back to the canvas\n        canvas[indices.long()] = torch.arange(\n            start=0, end=voxel_mean.size(0), device=voxel_mean.device)\n\n        # Step 2: get voxel mean for each point\n        voxel_index = (\n            pts_coors[:, 0] * canvas_z * canvas_y * canvas_x +\n            pts_coors[:, 1] * canvas_y * canvas_x +\n            pts_coors[:, 2] * canvas_x + pts_coors[:, 3])\n        voxel_inds = canvas[voxel_index.long()]\n        center_per_point = voxel_mean[voxel_inds, ...]\n        return center_per_point\n\n    @force_fp32(out_fp16=True)\n    def forward(self,\n                features,\n                coors,\n                points=None,\n                img_feats=None,\n                img_metas=None):\n        \"\"\"Forward functions.\n\n        Args:\n            features (torch.Tensor): Features of voxels, shape is NxC.\n            coors (torch.Tensor): Coordinates of voxels, shape is  Nx(1+NDim).\n            points (list[torch.Tensor], optional): Raw points used to guide the\n                multi-modality fusion. Defaults to None.\n            img_feats (list[torch.Tensor], optional): Image features used for\n                multi-modality fusion. Defaults to None.\n            img_metas (dict, optional): [description]. Defaults to None.\n\n        Returns:\n            tuple: If `return_point_feats` is False, returns voxel features and\n                its coordinates. If `return_point_feats` is True, returns\n                feature of each points inside voxels.\n        \"\"\"\n        features_ls = [features]\n        # Find distance of x, y, and z from cluster center\n        if self._with_cluster_center:\n            voxel_mean, mean_coors = self.cluster_scatter(features, coors)\n            points_mean = self.map_voxel_center_to_point(\n                coors, voxel_mean, mean_coors)\n            # TODO: maybe also do cluster for reflectivity\n            f_cluster = features[:, :3] - points_mean[:, :3]\n            features_ls.append(f_cluster)\n\n        # Find distance of x, y, and z from pillar center\n        if self._with_voxel_center:\n            f_center = features.new_zeros(size=(features.size(0), 3))\n            f_center[:, 0] = features[:, 0] - (\n                coors[:, 3].type_as(features) * self.vx + self.x_offset)\n            f_center[:, 1] = features[:, 1] - (\n                coors[:, 2].type_as(features) * self.vy + self.y_offset)\n            f_center[:, 2] = features[:, 2] - (\n                coors[:, 1].type_as(features) * self.vz + self.z_offset)\n            features_ls.append(f_center)\n\n        if self._with_distance:\n            points_dist = torch.norm(features[:, :3], 2, 1, keepdim=True)\n            features_ls.append(points_dist)\n\n        # Combine together feature decorations\n        features = torch.cat(features_ls, dim=-1)\n        for i, vfe in enumerate(self.vfe_layers):\n            point_feats = vfe(features)\n            if (i == len(self.vfe_layers) - 1 and self.fusion_layer is not None\n                    and img_feats is not None):\n                point_feats = self.fusion_layer(img_feats, points, point_feats,\n                                                img_metas)\n            voxel_feats, voxel_coors = self.vfe_scatter(point_feats, coors)\n            if i != len(self.vfe_layers) - 1:\n                # need to concat voxel feats if it is not the last vfe\n                feat_per_point = self.map_voxel_center_to_point(\n                    coors, voxel_feats, voxel_coors)\n                features = torch.cat([point_feats, feat_per_point], dim=1)\n\n        if self.return_point_feats:\n            return point_feats\n        return voxel_feats, voxel_coors\n\n\n@VOXEL_ENCODERS.register_module()\nclass HardVFE(nn.Module):\n    \"\"\"Voxel feature encoder used in DV-SECOND.\n\n    It encodes features of voxels and their points. It could also fuse\n    image feature into voxel features in a point-wise manner.\n\n    Args:\n        in_channels (int, optional): Input channels of VFE. Defaults to 4.\n        feat_channels (list(int), optional): Channels of features in VFE.\n        with_distance (bool, optional): Whether to use the L2 distance\n            of points to the origin point. Defaults to False.\n        with_cluster_center (bool, optional): Whether to use the distance\n            to cluster center of points inside a voxel. Defaults to False.\n        with_voxel_center (bool, optional): Whether to use the distance to\n            center of voxel for each points inside a voxel. Defaults to False.\n        voxel_size (tuple[float], optional): Size of a single voxel.\n            Defaults to (0.2, 0.2, 4).\n        point_cloud_range (tuple[float], optional): The range of points\n            or voxels. Defaults to (0, -40, -3, 70.4, 40, 1).\n        norm_cfg (dict, optional): Config dict of normalization layers.\n        mode (str, optional): The mode when pooling features of points inside a\n            voxel. Available options include 'max' and 'avg'.\n            Defaults to 'max'.\n        fusion_layer (dict, optional): The config dict of fusion layer\n            used in multi-modal detectors. Defaults to None.\n        return_point_feats (bool, optional): Whether to return the\n            features of each points. Defaults to False.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=4,\n                 feat_channels=[],\n                 with_distance=False,\n                 with_cluster_center=False,\n                 with_voxel_center=False,\n                 voxel_size=(0.2, 0.2, 4),\n                 point_cloud_range=(0, -40, -3, 70.4, 40, 1),\n                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n                 mode='max',\n                 fusion_layer=None,\n                 return_point_feats=False):\n        super(HardVFE, self).__init__()\n        assert len(feat_channels) > 0\n        if with_cluster_center:\n            in_channels += 3\n        if with_voxel_center:\n            in_channels += 3\n        if with_distance:\n            in_channels += 1\n        self.in_channels = in_channels\n        self._with_distance = with_distance\n        self._with_cluster_center = with_cluster_center\n        self._with_voxel_center = with_voxel_center\n        self.return_point_feats = return_point_feats\n        self.fp16_enabled = False\n\n        # Need pillar (voxel) size and x/y offset to calculate pillar offset\n        self.vx = voxel_size[0]\n        self.vy = voxel_size[1]\n        self.vz = voxel_size[2]\n        self.x_offset = self.vx / 2 + point_cloud_range[0]\n        self.y_offset = self.vy / 2 + point_cloud_range[1]\n        self.z_offset = self.vz / 2 + point_cloud_range[2]\n        self.point_cloud_range = point_cloud_range\n        self.scatter = DynamicScatter(voxel_size, point_cloud_range, True)\n\n        feat_channels = [self.in_channels] + list(feat_channels)\n        vfe_layers = []\n        for i in range(len(feat_channels) - 1):\n            in_filters = feat_channels[i]\n            out_filters = feat_channels[i + 1]\n            if i > 0:\n                in_filters *= 2\n            # TODO: pass norm_cfg to VFE\n            # norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)\n            if i == (len(feat_channels) - 2):\n                cat_max = False\n                max_out = True\n                if fusion_layer:\n                    max_out = False\n            else:\n                max_out = True\n                cat_max = True\n            vfe_layers.append(\n                VFELayer(\n                    in_filters,\n                    out_filters,\n                    norm_cfg=norm_cfg,\n                    max_out=max_out,\n                    cat_max=cat_max))\n            self.vfe_layers = nn.ModuleList(vfe_layers)\n        self.num_vfe = len(vfe_layers)\n\n        self.fusion_layer = None\n        if fusion_layer is not None:\n            self.fusion_layer = builder.build_fusion_layer(fusion_layer)\n\n    @force_fp32(out_fp16=True)\n    def forward(self,\n                features,\n                num_points,\n                coors,\n                img_feats=None,\n                img_metas=None):\n        \"\"\"Forward functions.\n\n        Args:\n            features (torch.Tensor): Features of voxels, shape is MxNxC.\n            num_points (torch.Tensor): Number of points in each voxel.\n            coors (torch.Tensor): Coordinates of voxels, shape is Mx(1+NDim).\n            img_feats (list[torch.Tensor], optional): Image features used for\n                multi-modality fusion. Defaults to None.\n            img_metas (dict, optional): [description]. Defaults to None.\n\n        Returns:\n            tuple: If `return_point_feats` is False, returns voxel features and\n                its coordinates. If `return_point_feats` is True, returns\n                feature of each points inside voxels.\n        \"\"\"\n        features_ls = [features]\n        # Find distance of x, y, and z from cluster center\n        if self._with_cluster_center:\n            points_mean = (\n                features[:, :, :3].sum(dim=1, keepdim=True) /\n                num_points.type_as(features).view(-1, 1, 1))\n            # TODO: maybe also do cluster for reflectivity\n            f_cluster = features[:, :, :3] - points_mean\n            features_ls.append(f_cluster)\n\n        # Find distance of x, y, and z from pillar center\n        if self._with_voxel_center:\n            f_center = features.new_zeros(\n                size=(features.size(0), features.size(1), 3))\n            f_center[:, :, 0] = features[:, :, 0] - (\n                coors[:, 3].type_as(features).unsqueeze(1) * self.vx +\n                self.x_offset)\n            f_center[:, :, 1] = features[:, :, 1] - (\n                coors[:, 2].type_as(features).unsqueeze(1) * self.vy +\n                self.y_offset)\n            f_center[:, :, 2] = features[:, :, 2] - (\n                coors[:, 1].type_as(features).unsqueeze(1) * self.vz +\n                self.z_offset)\n            features_ls.append(f_center)\n\n        if self._with_distance:\n            points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)\n            features_ls.append(points_dist)\n\n        # Combine together feature decorations\n        voxel_feats = torch.cat(features_ls, dim=-1)\n        # The feature decorations were calculated without regard to whether\n        # pillar was empty.\n        # Need to ensure that empty voxels remain set to zeros.\n        voxel_count = voxel_feats.shape[1]\n        mask = get_paddings_indicator(num_points, voxel_count, axis=0)\n        voxel_feats *= mask.unsqueeze(-1).type_as(voxel_feats)\n\n        for i, vfe in enumerate(self.vfe_layers):\n            voxel_feats = vfe(voxel_feats)\n\n        if (self.fusion_layer is not None and img_feats is not None):\n            voxel_feats = self.fusion_with_mask(features, mask, voxel_feats,\n                                                coors, img_feats, img_metas)\n\n        return voxel_feats\n\n    def fusion_with_mask(self, features, mask, voxel_feats, coors, img_feats,\n                         img_metas):\n        \"\"\"Fuse image and point features with mask.\n\n        Args:\n            features (torch.Tensor): Features of voxel, usually it is the\n                values of points in voxels.\n            mask (torch.Tensor): Mask indicates valid features in each voxel.\n            voxel_feats (torch.Tensor): Features of voxels.\n            coors (torch.Tensor): Coordinates of each single voxel.\n            img_feats (list[torch.Tensor]): Multi-scale feature maps of image.\n            img_metas (list(dict)): Meta information of image and points.\n\n        Returns:\n            torch.Tensor: Fused features of each voxel.\n        \"\"\"\n        # the features is consist of a batch of points\n        batch_size = coors[-1, 0] + 1\n        points = []\n        for i in range(batch_size):\n            single_mask = (coors[:, 0] == i)\n            points.append(features[single_mask][mask[single_mask]])\n\n        point_feats = voxel_feats[mask]\n        point_feats = self.fusion_layer(img_feats, points, point_feats,\n                                        img_metas)\n\n        voxel_canvas = voxel_feats.new_zeros(\n            size=(voxel_feats.size(0), voxel_feats.size(1),\n                  point_feats.size(-1)))\n        voxel_canvas[mask] = point_feats\n        out = torch.max(voxel_canvas, dim=1)[0]\n\n        return out\n"
  },
  {
    "path": "mmdet3d/ops/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version,\n                      get_compiling_cuda_version, nms, roi_align,\n                      sigmoid_focal_loss)\nfrom mmcv.ops.assign_score_withk import assign_score_withk\nfrom mmcv.ops.ball_query import ball_query\nfrom mmcv.ops.furthest_point_sample import (furthest_point_sample,\n                                            furthest_point_sample_with_dist)\nfrom mmcv.ops.gather_points import gather_points\nfrom mmcv.ops.group_points import GroupAll, QueryAndGroup, grouping_operation\nfrom mmcv.ops.knn import knn\nfrom mmcv.ops.points_in_boxes import (points_in_boxes_all, points_in_boxes_cpu,\n                                      points_in_boxes_part)\nfrom mmcv.ops.points_sampler import PointsSampler as Points_Sampler\nfrom mmcv.ops.roiaware_pool3d import RoIAwarePool3d\nfrom mmcv.ops.roipoint_pool3d import RoIPointPool3d\nfrom mmcv.ops.scatter_points import DynamicScatter, dynamic_scatter\nfrom mmcv.ops.three_interpolate import three_interpolate\nfrom mmcv.ops.three_nn import three_nn\nfrom mmcv.ops.voxelize import Voxelization, voxelization\n\nfrom .dgcnn_modules import DGCNNFAModule, DGCNNFPModule, DGCNNGFModule\nfrom .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d\nfrom .paconv import PAConv, PAConvCUDA\nfrom .pointnet_modules import (PAConvCUDASAModule, PAConvCUDASAModuleMSG,\n                               PAConvSAModule, PAConvSAModuleMSG,\n                               PointFPModule, PointSAModule, PointSAModuleMSG,\n                               build_sa_module)\nfrom .sparse_block import (SparseBasicBlock, SparseBottleneck,\n                           make_sparse_convmodule)\n\n__all__ = [\n    'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'get_compiler_version',\n    'get_compiling_cuda_version', 'NaiveSyncBatchNorm1d',\n    'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization',\n    'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss',\n    'SigmoidFocalLoss', 'SparseBasicBlock', 'SparseBottleneck',\n    'RoIAwarePool3d', 'points_in_boxes_part', 'points_in_boxes_cpu',\n    'make_sparse_convmodule', 'ball_query', 'knn', 'furthest_point_sample',\n    'furthest_point_sample_with_dist', 'three_interpolate', 'three_nn',\n    'gather_points', 'grouping_operation', 'GroupAll', 'QueryAndGroup',\n    'PointSAModule', 'PointSAModuleMSG', 'PointFPModule', 'DGCNNFPModule',\n    'DGCNNGFModule', 'DGCNNFAModule', 'points_in_boxes_all',\n    'get_compiler_version', 'assign_score_withk', 'get_compiling_cuda_version',\n    'Points_Sampler', 'build_sa_module', 'PAConv', 'PAConvCUDA',\n    'PAConvSAModuleMSG', 'PAConvSAModule', 'PAConvCUDASAModule',\n    'PAConvCUDASAModuleMSG', 'RoIPointPool3d'\n]\n"
  },
  {
    "path": "mmdet3d/ops/bev_pool_v2/__init__.py",
    "content": "# Copyright (c) Phigent Robotics. All rights reserved.\n"
  },
  {
    "path": "mmdet3d/ops/bev_pool_v2/bev_pool.py",
    "content": "# Copyright (c) Phigent Robotics. All rights reserved.\n\nimport numpy as np\nimport torch\n\nfrom . import bev_pool_v2_ext\n\n__all__ = ['bev_pool_v2', 'TRTBEVPoolv2']\n\n\nclass QuickCumsumCuda(torch.autograd.Function):\n\n    @staticmethod\n    def forward(ctx, depth, feat, ranks_depth, ranks_feat, ranks_bev,\n                bev_feat_shape, interval_starts, interval_lengths):\n        ranks_bev = ranks_bev.int()\n        depth = depth.contiguous().float()\n        feat = feat.contiguous().float()\n        ranks_depth = ranks_depth.contiguous().int()\n        ranks_feat = ranks_feat.contiguous().int()\n        interval_lengths = interval_lengths.contiguous().int()\n        interval_starts = interval_starts.contiguous().int()\n\n        out = feat.new_zeros(bev_feat_shape)\n\n        bev_pool_v2_ext.bev_pool_v2_forward(\n            depth,\n            feat,\n            out,\n            ranks_depth,\n            ranks_feat,\n            ranks_bev,\n            interval_lengths,\n            interval_starts,\n        )\n\n        ctx.save_for_backward(ranks_bev, depth, feat, ranks_feat, ranks_depth)\n        return out\n\n    @staticmethod\n    def backward(ctx, out_grad):\n        ranks_bev, depth, feat, ranks_feat, ranks_depth = ctx.saved_tensors\n\n        order = ranks_feat.argsort()\n        ranks_feat, ranks_depth, ranks_bev = \\\n            ranks_feat[order], ranks_depth[order], ranks_bev[order]\n        kept = torch.ones(\n            ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool)\n        kept[1:] = ranks_feat[1:] != ranks_feat[:-1]\n        interval_starts_bp = torch.where(kept)[0].int()\n        interval_lengths_bp = torch.zeros_like(interval_starts_bp)\n        interval_lengths_bp[:-1] = interval_starts_bp[\n            1:] - interval_starts_bp[:-1]\n        interval_lengths_bp[-1] = ranks_bev.shape[0] - interval_starts_bp[-1]\n\n        depth = depth.contiguous()\n        feat = feat.contiguous()\n        ranks_depth = ranks_depth.contiguous()\n        ranks_feat = ranks_feat.contiguous()\n        ranks_bev = ranks_bev.contiguous()\n        interval_lengths_bp = interval_lengths_bp.contiguous()\n        interval_starts_bp = interval_starts_bp.contiguous()\n\n        depth_grad = depth.new_zeros(depth.shape)\n        feat_grad = feat.new_zeros(feat.shape)\n        out_grad = out_grad.contiguous()\n        bev_pool_v2_ext.bev_pool_v2_backward(\n            out_grad,\n            depth_grad,\n            feat_grad,\n            depth,\n            feat,\n            ranks_depth,\n            ranks_feat,\n            ranks_bev,\n            interval_lengths_bp,\n            interval_starts_bp,\n        )\n        return depth_grad, feat_grad, None, None, None, None, None, \\\n            None, None, None\n\n\ndef bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev,\n                bev_feat_shape, interval_starts, interval_lengths):\n    x = QuickCumsumCuda.apply(depth, feat, ranks_depth, ranks_feat, ranks_bev,\n                              bev_feat_shape, interval_starts,\n                              interval_lengths)\n    x = x.permute(0, 4, 1, 2, 3).contiguous()\n    return x\n\n\nclass TRTBEVPoolv2(torch.autograd.Function):\n\n    @staticmethod\n    def symbolic(g,\n                 depth,\n                 feat,\n                 ranks_depth,\n                 ranks_feat,\n                 ranks_bev,\n                 interval_starts,\n                 interval_lengths,\n                 out_height=128,\n                 out_width=128):\n        \"\"\"symbolic function for creating onnx op.\"\"\"\n        return g.op(\n            'mmdeploy::bev_pool_v2',\n            depth,\n            feat,\n            ranks_depth,\n            ranks_feat,\n            ranks_bev,\n            interval_starts,\n            interval_lengths,\n            out_height_i=out_height,\n            out_width_i=out_width)\n\n    @staticmethod\n    def forward(g,\n                depth,\n                feat,\n                ranks_depth,\n                ranks_feat,\n                ranks_bev,\n                interval_starts,\n                interval_lengths,\n                out_height=128,\n                out_width=128):\n        \"\"\"run forward.\"\"\"\n        n, d, h, w = depth.shape\n        feat = feat.view(1, n, feat.shape[3], h, w)\n        feat = feat.permute(0, 1, 3, 4, 2)\n        depth = depth.view(1, n, d, h, w)\n        bev_feat_shape = (depth.shape[0], 1, out_height, out_width,\n                          feat.shape[-1])  # (B, Z, Y, X, C)\n        bev_feat = bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev,\n                               bev_feat_shape, interval_starts,\n                               interval_lengths)\n        bev_feat = bev_feat.squeeze(2)\n        bev_feat = bev_feat.permute(0, 2, 3, 1)\n        return bev_feat\n\n\ndef test_bev_pool_v2():\n    depth = np.array([0.3, 0.4, 0.2, 0.1, 0.7, 0.6, 0.8, 0.9])\n    depth = torch.from_numpy(depth).float().cuda()\n    depth = depth.view(1, 1, 2, 2, 2).requires_grad_()\n    feat = torch.ones(\n        size=[1, 1, 2, 2, 2], dtype=torch.float,\n        device='cuda').requires_grad_()\n    ranks_depth = torch.from_numpy(np.array([0, 4, 1, 6])).int().cuda()\n    ranks_feat = torch.from_numpy(np.array([0, 0, 1, 2])).int().cuda()\n    ranks_bev = torch.from_numpy(np.array([0, 0, 1, 1])).int().cuda()\n\n    kept = torch.ones(\n        ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool)\n    kept[1:] = ranks_bev[1:] != ranks_bev[:-1]\n    interval_starts = torch.where(kept)[0].int()\n    if len(interval_starts) == 0:\n        return None, None, None, None, None\n    interval_lengths = torch.zeros_like(interval_starts)\n    interval_lengths[:-1] = interval_starts[1:] - interval_starts[:-1]\n    interval_lengths[-1] = ranks_bev.shape[0] - interval_starts[-1]\n    bev_feat = bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev,\n                           (1, 1, 2, 2, 2), interval_starts, interval_lengths)\n    loss = torch.sum(bev_feat)\n    loss.backward()\n    assert loss == 4.4\n    grad_depth = np.array([2., 2., 0., 0., 2., 0., 2., 0.])\n    grad_depth = torch.from_numpy(grad_depth).float()\n    grad_depth = grad_depth.cuda().view(1, 1, 2, 2, 2)\n    assert depth.grad.allclose(grad_depth)\n    grad_feat = np.array([1.0, 1.0, 0.4, 0.4, 0.8, 0.8, 0., 0.])\n    grad_feat = torch.from_numpy(grad_feat).float().cuda().view(1, 1, 2, 2, 2)\n    assert feat.grad.allclose(grad_feat)\n"
  },
  {
    "path": "mmdet3d/ops/bev_pool_v2/src/bev_pool.cpp",
    "content": "#include <torch/torch.h>\n#include <c10/cuda/CUDAGuard.h>\n\n// CUDA function declarations\nvoid bev_pool_v2(int c, int n_intervals, const float* depth, const float* feat,\n    const int* ranks_depth, const int* ranks_feat, const int* ranks_bev,\n    const int* interval_starts, const int* interval_lengths, float* out);\n\nvoid bev_pool_v2_grad(int c, int n_intervals, const float* out_grad,\n  const float* depth, const float* feat, const int* ranks_depth, const int* ranks_feat,\n  const int* ranks_bev, const int* interval_starts, const int* interval_lengths,\n  float* depth_grad, float* feat_grad);\n\n\n/*\n  Function: pillar pooling (forward, cuda)\n  Args:\n    depth            : input depth, FloatTensor[n, d, h, w]\n    feat             : input features, FloatTensor[n, h, w, c]\n    out              : output features, FloatTensor[b, c, h_out, w_out]\n    ranks_depth      : depth index of points, IntTensor[n_points]\n    ranks_feat       : feat index of points, IntTensor[n_points]\n    ranks_bev        : output index of points, IntTensor[n_points]\n    interval_lengths : starting position for pooled point, IntTensor[n_intervals]\n    interval_starts  : how many points in each pooled point, IntTensor[n_intervals]\n  Return:\n*/\nvoid bev_pool_v2_forward(\n  const at::Tensor _depth,\n  const at::Tensor _feat,\n  at::Tensor _out,\n  const at::Tensor _ranks_depth,\n  const at::Tensor _ranks_feat,\n  const at::Tensor _ranks_bev,\n  const at::Tensor _interval_lengths,\n  const at::Tensor _interval_starts\n) {\n  int c = _feat.size(4);\n  int n_intervals = _interval_lengths.size(0);\n  const at::cuda::OptionalCUDAGuard device_guard(device_of(_depth));\n  const float* depth = _depth.data_ptr<float>();\n  const float* feat = _feat.data_ptr<float>();\n  const int* ranks_depth = _ranks_depth.data_ptr<int>();\n  const int* ranks_feat = _ranks_feat.data_ptr<int>();\n  const int* ranks_bev = _ranks_bev.data_ptr<int>();\n\n  const int* interval_lengths = _interval_lengths.data_ptr<int>();\n  const int* interval_starts = _interval_starts.data_ptr<int>();\n\n  float* out = _out.data_ptr<float>();\n  bev_pool_v2(\n    c, n_intervals, depth, feat, ranks_depth, ranks_feat,\n    ranks_bev, interval_starts, interval_lengths, out\n  );\n}\n\n\n/*\n  Function: pillar pooling (backward, cuda)\n  Args:\n    out_grad         : grad of output bev feature, FloatTensor[b, c, h_out, w_out]\n    depth_grad       : grad of input depth, FloatTensor[n, d, h, w]\n    feat_grad        : grad of input feature, FloatTensor[n, h, w, c]\n    depth            : input depth, FloatTensor[n, d, h, w]\n    feat             : input features, FloatTensor[n, h, w, c]\n    ranks_depth      : depth index of points, IntTensor[n_points]\n    ranks_feat       : feat index of points, IntTensor[n_points]\n    ranks_bev        : output index of points, IntTensor[n_points]\n    interval_lengths : starting position for pooled point, IntTensor[n_intervals]\n    interval_starts  : how many points in each pooled point, IntTensor[n_intervals]\n*/\nvoid bev_pool_v2_backward(\n  const at::Tensor _out_grad,\n  at::Tensor _depth_grad,\n  at::Tensor _feat_grad,\n  const at::Tensor _depth,\n  const at::Tensor _feat,\n  const at::Tensor _ranks_depth,\n  const at::Tensor _ranks_feat,\n  const at::Tensor _ranks_bev,\n  const at::Tensor _interval_lengths,\n  const at::Tensor _interval_starts\n) {\n  int c = _out_grad.size(4);\n  int n_intervals = _interval_lengths.size(0);\n  const at::cuda::OptionalCUDAGuard device_guard(device_of(_out_grad));\n  const float* out_grad = _out_grad.data_ptr<float>();\n  float* depth_grad = _depth_grad.data_ptr<float>();\n  float* feat_grad = _feat_grad.data_ptr<float>();\n  const float* depth = _depth.data_ptr<float>();\n  const float* feat = _feat.data_ptr<float>();\n  const int* ranks_depth = _ranks_depth.data_ptr<int>();\n  const int* ranks_feat = _ranks_feat.data_ptr<int>();\n  const int* ranks_bev = _ranks_bev.data_ptr<int>();\n  const int* interval_lengths = _interval_lengths.data_ptr<int>();\n  const int* interval_starts = _interval_starts.data_ptr<int>();\n\n  bev_pool_v2_grad(\n    c, n_intervals, out_grad, depth, feat, ranks_depth, ranks_feat,\n    ranks_bev, interval_starts, interval_lengths, depth_grad, feat_grad\n  );\n}\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n  m.def(\"bev_pool_v2_forward\", &bev_pool_v2_forward,\n        \"bev_pool_v2_forward\");\n  m.def(\"bev_pool_v2_backward\", &bev_pool_v2_backward,\n        \"bev_pool_v2_backward\");\n}\n"
  },
  {
    "path": "mmdet3d/ops/bev_pool_v2/src/bev_pool_cuda.cu",
    "content": "#include <stdio.h>\n#include <stdlib.h>\n\n/*\n  Function: pillar pooling\n  Args:\n    c                : number of channels\n    n_intervals      : number of unique points\n    depth            : input depth, FloatTensor[b,n,d,h,w]\n    feat             : input feat, FloatTensor[b,n,h,w,c]\n    ranks_depth      : input index of depth, IntTensor[n]\n    ranks_feat       : input index of feat, IntTensor[n]\n    ranks_bev        : output index, IntTensor[n]\n    interval_lengths : starting position for pooled point, IntTensor[n_intervals]\n    interval_starts  : how many points in each pooled point, IntTensor[n_intervals]\n    out              : output features, FloatTensor[b, d, h, w, c]\n*/\n__global__ void bev_pool_v2_kernel(int c, int n_intervals,\n                                  const float *__restrict__ depth,\n                                  const float *__restrict__ feat,\n                                  const int *__restrict__ ranks_depth,\n                                  const int *__restrict__ ranks_feat,\n                                  const int *__restrict__ ranks_bev,\n                                  const int *__restrict__ interval_starts,\n                                  const int *__restrict__ interval_lengths,\n                                  float* __restrict__ out) {\n  int idx = blockIdx.x * blockDim.x + threadIdx.x;\n  int index = idx / c;\n  int cur_c = idx % c;\n  if (index >= n_intervals) return;\n  int interval_start = interval_starts[index];\n  int interval_length = interval_lengths[index];\n  float psum = 0;\n  const float* cur_depth;\n  const float* cur_feat;\n  for(int i = 0; i < interval_length; i++){\n    cur_depth = depth + ranks_depth[interval_start+i];\n    cur_feat = feat + ranks_feat[interval_start+i] * c + cur_c;\n    psum += *cur_feat * *cur_depth;\n  }\n\n  const int* cur_rank = ranks_bev + interval_start;\n  float* cur_out = out + *cur_rank * c + cur_c;\n  *cur_out = psum;\n}\n\n\n/*\n  Function: pillar pooling backward\n  Args:\n    c                : number of channels\n    n_intervals      : number of unique points\n    out_grad         : gradient of the BEV fmap from top, FloatTensor[b, d, h, w, c]\n    depth            : input depth, FloatTensor[b,n,d,h,w]\n    feat             : input feat, FloatTensor[b,n,h,w,c]\n    ranks_depth      : input index of depth, IntTensor[n]\n    ranks_feat       : input index of feat, IntTensor[n]\n    ranks_bev        : output index, IntTensor[n]\n    interval_lengths : starting position for pooled point, IntTensor[n_intervals]\n    interval_starts  : how many points in each pooled point, IntTensor[n_intervals]\n    depth_grad       : gradient of the depth fmap, FloatTensor\n    feat_grad        : gradient of the feature fmap, FloatTensor\n*/\n__global__ void bev_pool_grad_kernel(int c, int n_intervals,\n                                  const float *__restrict__ out_grad,\n                                  const float *__restrict__ depth,\n                                  const float *__restrict__ feat,\n                                  const int *__restrict__ ranks_depth,\n                                  const int *__restrict__ ranks_feat,\n                                  const int *__restrict__ ranks_bev,\n                                  const int *__restrict__ interval_starts,\n                                  const int *__restrict__ interval_lengths,\n                                  float* __restrict__ depth_grad,\n                                  float* __restrict__ feat_grad) {\n  int idx = blockIdx.x * blockDim.x + threadIdx.x;\n  if (idx >= n_intervals) return;\n  int interval_start = interval_starts[idx];\n  int interval_length = interval_lengths[idx];\n\n  const int* cur_rank;\n  const float* cur_out_grad;\n  const float* cur_out_grad_start;\n\n  const float* cur_feat;\n  const float* cur_feat_start;\n  float* cur_depth_grad;\n  float grad_sum;\n  for(int i = 0; i < interval_length; i++){\n    cur_rank = ranks_bev + interval_start + i;\n    cur_out_grad_start = out_grad +  * cur_rank * c;\n    cur_feat_start = feat + ranks_feat[interval_start+i] * c;\n\n    grad_sum = 0;\n    for(int cur_c = 0; cur_c < c; cur_c++){\n      cur_out_grad = cur_out_grad_start + cur_c;\n      cur_feat = cur_feat_start + cur_c;\n      grad_sum += *cur_out_grad * *cur_feat;\n    }\n\n    cur_depth_grad = depth_grad + ranks_depth[interval_start+i];\n    *cur_depth_grad = grad_sum;\n  }\n\n  float* cur_feat_grad;\n  const float* cur_depth;\n  for(int cur_c = 0; cur_c < c; cur_c++){\n    grad_sum = 0;\n    for(int i = 0; i < interval_length; i++){\n      cur_rank = ranks_bev + interval_start + i;\n      cur_out_grad = out_grad + *cur_rank * c + cur_c;\n\n      cur_depth = depth + ranks_depth[interval_start+i];\n      grad_sum += *cur_out_grad * *cur_depth;\n    }\n    cur_feat_grad = feat_grad + ranks_feat[interval_start] * c + cur_c ;\n    * cur_feat_grad = grad_sum;\n  }\n}\n\n\n\nvoid bev_pool_v2(int c, int n_intervals, const float* depth, const float* feat, const int* ranks_depth,\n  const int* ranks_feat, const int* ranks_bev, const int* interval_starts, const int* interval_lengths, float* out) {\n  bev_pool_v2_kernel<<<(int)ceil(((double)n_intervals * c / 256)), 256>>>(\n    c, n_intervals, depth, feat, ranks_depth, ranks_feat,\n    ranks_bev, interval_starts, interval_lengths, out\n  );\n}\n\nvoid bev_pool_v2_grad(int c, int n_intervals, const float* out_grad,\n  const float* depth, const float* feat, const int* ranks_depth, const int* ranks_feat,\n  const int* ranks_bev, const int* interval_starts, const int* interval_lengths, float* depth_grad, float* feat_grad) {\n  bev_pool_grad_kernel<<<(int)ceil(((double)n_intervals / 256)), 256>>>(\n     c, n_intervals, out_grad, depth, feat, ranks_depth, ranks_feat,\n     ranks_bev, interval_starts, interval_lengths, depth_grad, feat_grad\n  );\n}\n"
  },
  {
    "path": "mmdet3d/ops/dgcnn_modules/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .dgcnn_fa_module import DGCNNFAModule\nfrom .dgcnn_fp_module import DGCNNFPModule\nfrom .dgcnn_gf_module import DGCNNGFModule\n\n__all__ = ['DGCNNFAModule', 'DGCNNFPModule', 'DGCNNGFModule']\n"
  },
  {
    "path": "mmdet3d/ops/dgcnn_modules/dgcnn_fa_module.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule, force_fp32\nfrom torch import nn as nn\n\n\nclass DGCNNFAModule(BaseModule):\n    \"\"\"Point feature aggregation module used in DGCNN.\n\n    Aggregate all the features of points.\n\n    Args:\n        mlp_channels (list[int]): List of mlp channels.\n        norm_cfg (dict, optional): Type of normalization method.\n            Defaults to dict(type='BN1d').\n        act_cfg (dict, optional): Type of activation method.\n            Defaults to dict(type='ReLU').\n        init_cfg (dict, optional): Initialization config. Defaults to None.\n    \"\"\"\n\n    def __init__(self,\n                 mlp_channels,\n                 norm_cfg=dict(type='BN1d'),\n                 act_cfg=dict(type='ReLU'),\n                 init_cfg=None):\n        super().__init__(init_cfg=init_cfg)\n        self.fp16_enabled = False\n        self.mlps = nn.Sequential()\n        for i in range(len(mlp_channels) - 1):\n            self.mlps.add_module(\n                f'layer{i}',\n                ConvModule(\n                    mlp_channels[i],\n                    mlp_channels[i + 1],\n                    kernel_size=(1, ),\n                    stride=(1, ),\n                    conv_cfg=dict(type='Conv1d'),\n                    norm_cfg=norm_cfg,\n                    act_cfg=act_cfg))\n\n    @force_fp32()\n    def forward(self, points):\n        \"\"\"forward.\n\n        Args:\n            points (List[Tensor]): tensor of the features to be aggregated.\n\n        Returns:\n            Tensor: (B, N, M) M = mlp[-1], tensor of the output points.\n        \"\"\"\n\n        if len(points) > 1:\n            new_points = torch.cat(points[1:], dim=-1)\n            new_points = new_points.transpose(1, 2).contiguous()  # (B, C, N)\n            new_points_copy = new_points\n\n            new_points = self.mlps(new_points)\n\n            new_fa_points = new_points.max(dim=-1, keepdim=True)[0]\n            new_fa_points = new_fa_points.repeat(1, 1, new_points.shape[-1])\n\n            new_points = torch.cat([new_fa_points, new_points_copy], dim=1)\n            new_points = new_points.transpose(1, 2).contiguous()\n        else:\n            new_points = points\n\n        return new_points\n"
  },
  {
    "path": "mmdet3d/ops/dgcnn_modules/dgcnn_fp_module.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule, force_fp32\nfrom torch import nn as nn\n\n\nclass DGCNNFPModule(BaseModule):\n    \"\"\"Point feature propagation module used in DGCNN.\n\n    Propagate the features from one set to another.\n\n    Args:\n        mlp_channels (list[int]): List of mlp channels.\n        norm_cfg (dict, optional): Type of activation method.\n            Defaults to dict(type='BN1d').\n        act_cfg (dict, optional): Type of activation method.\n            Defaults to dict(type='ReLU').\n        init_cfg (dict, optional): Initialization config. Defaults to None.\n    \"\"\"\n\n    def __init__(self,\n                 mlp_channels,\n                 norm_cfg=dict(type='BN1d'),\n                 act_cfg=dict(type='ReLU'),\n                 init_cfg=None):\n        super().__init__(init_cfg=init_cfg)\n        self.fp16_enabled = False\n        self.mlps = nn.Sequential()\n        for i in range(len(mlp_channels) - 1):\n            self.mlps.add_module(\n                f'layer{i}',\n                ConvModule(\n                    mlp_channels[i],\n                    mlp_channels[i + 1],\n                    kernel_size=(1, ),\n                    stride=(1, ),\n                    conv_cfg=dict(type='Conv1d'),\n                    norm_cfg=norm_cfg,\n                    act_cfg=act_cfg))\n\n    @force_fp32()\n    def forward(self, points):\n        \"\"\"forward.\n\n        Args:\n            points (Tensor): (B, N, C) tensor of the input points.\n\n        Returns:\n            Tensor: (B, N, M) M = mlp[-1], tensor of the new points.\n        \"\"\"\n\n        if points is not None:\n            new_points = points.transpose(1, 2).contiguous()  # (B, C, N)\n            new_points = self.mlps(new_points)\n            new_points = new_points.transpose(1, 2).contiguous()\n        else:\n            new_points = points\n\n        return new_points\n"
  },
  {
    "path": "mmdet3d/ops/dgcnn_modules/dgcnn_gf_module.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.cnn import ConvModule\nfrom mmcv.ops.group_points import GroupAll, QueryAndGroup, grouping_operation\nfrom torch import nn as nn\nfrom torch.nn import functional as F\n\n\nclass BaseDGCNNGFModule(nn.Module):\n    \"\"\"Base module for point graph feature module used in DGCNN.\n\n    Args:\n        radii (list[float]): List of radius in each knn or ball query.\n        sample_nums (list[int]): Number of samples in each knn or ball query.\n        mlp_channels (list[list[int]]): Specify of the dgcnn before\n            the global pooling for each graph feature module.\n        knn_modes (list[str], optional): Type of KNN method, valid mode\n            ['F-KNN', 'D-KNN'], Defaults to ['F-KNN'].\n        dilated_group (bool, optional): Whether to use dilated ball query.\n            Defaults to False.\n        use_xyz (bool, optional): Whether to use xyz as point features.\n            Defaults to True.\n        pool_mode (str, optional): Type of pooling method. Defaults to 'max'.\n        normalize_xyz (bool, optional): If ball query, whether to normalize\n            local XYZ with radius. Defaults to False.\n        grouper_return_grouped_xyz (bool, optional): Whether to return grouped\n            xyz in `QueryAndGroup`. Defaults to False.\n        grouper_return_grouped_idx (bool, optional): Whether to return grouped\n            idx in `QueryAndGroup`. Defaults to False.\n    \"\"\"\n\n    def __init__(self,\n                 radii,\n                 sample_nums,\n                 mlp_channels,\n                 knn_modes=['F-KNN'],\n                 dilated_group=False,\n                 use_xyz=True,\n                 pool_mode='max',\n                 normalize_xyz=False,\n                 grouper_return_grouped_xyz=False,\n                 grouper_return_grouped_idx=False):\n        super(BaseDGCNNGFModule, self).__init__()\n\n        assert len(sample_nums) == len(\n            mlp_channels\n        ), 'Num_samples and mlp_channels should have the same length.'\n        assert pool_mode in ['max', 'avg'\n                             ], \"Pool_mode should be one of ['max', 'avg'].\"\n        assert isinstance(knn_modes, list) or isinstance(\n            knn_modes, tuple), 'The type of knn_modes should be list or tuple.'\n\n        if isinstance(mlp_channels, tuple):\n            mlp_channels = list(map(list, mlp_channels))\n        self.mlp_channels = mlp_channels\n\n        self.pool_mode = pool_mode\n        self.groupers = nn.ModuleList()\n        self.mlps = nn.ModuleList()\n        self.knn_modes = knn_modes\n\n        for i in range(len(sample_nums)):\n            sample_num = sample_nums[i]\n            if sample_num is not None:\n                if self.knn_modes[i] == 'D-KNN':\n                    grouper = QueryAndGroup(\n                        radii[i],\n                        sample_num,\n                        use_xyz=use_xyz,\n                        normalize_xyz=normalize_xyz,\n                        return_grouped_xyz=grouper_return_grouped_xyz,\n                        return_grouped_idx=True)\n                else:\n                    grouper = QueryAndGroup(\n                        radii[i],\n                        sample_num,\n                        use_xyz=use_xyz,\n                        normalize_xyz=normalize_xyz,\n                        return_grouped_xyz=grouper_return_grouped_xyz,\n                        return_grouped_idx=grouper_return_grouped_idx)\n            else:\n                grouper = GroupAll(use_xyz)\n            self.groupers.append(grouper)\n\n    def _pool_features(self, features):\n        \"\"\"Perform feature aggregation using pooling operation.\n\n        Args:\n            features (torch.Tensor): (B, C, N, K)\n                Features of locally grouped points before pooling.\n\n        Returns:\n            torch.Tensor: (B, C, N)\n                Pooled features aggregating local information.\n        \"\"\"\n        if self.pool_mode == 'max':\n            # (B, C, N, 1)\n            new_features = F.max_pool2d(\n                features, kernel_size=[1, features.size(3)])\n        elif self.pool_mode == 'avg':\n            # (B, C, N, 1)\n            new_features = F.avg_pool2d(\n                features, kernel_size=[1, features.size(3)])\n        else:\n            raise NotImplementedError\n\n        return new_features.squeeze(-1).contiguous()\n\n    def forward(self, points):\n        \"\"\"forward.\n\n        Args:\n            points (Tensor): (B, N, C) input points.\n\n        Returns:\n            List[Tensor]: (B, N, C1) new points generated from each graph\n                feature module.\n        \"\"\"\n        new_points_list = [points]\n\n        for i in range(len(self.groupers)):\n\n            new_points = new_points_list[i]\n            new_points_trans = new_points.transpose(\n                1, 2).contiguous()  # (B, C, N)\n\n            if self.knn_modes[i] == 'D-KNN':\n                # (B, N, C) -> (B, N, K)\n                idx = self.groupers[i](new_points[..., -3:].contiguous(),\n                                       new_points[..., -3:].contiguous())[-1]\n\n                grouped_results = grouping_operation(\n                    new_points_trans, idx)  # (B, C, N) -> (B, C, N, K)\n                grouped_results -= new_points_trans.unsqueeze(-1)\n            else:\n                grouped_results = self.groupers[i](\n                    new_points, new_points)  # (B, N, C) -> (B, C, N, K)\n\n            new_points = new_points_trans.unsqueeze(-1).repeat(\n                1, 1, 1, grouped_results.shape[-1])\n            new_points = torch.cat([grouped_results, new_points], dim=1)\n\n            # (B, mlp[-1], N, K)\n            new_points = self.mlps[i](new_points)\n\n            # (B, mlp[-1], N)\n            new_points = self._pool_features(new_points)\n            new_points = new_points.transpose(1, 2).contiguous()\n            new_points_list.append(new_points)\n\n        return new_points\n\n\nclass DGCNNGFModule(BaseDGCNNGFModule):\n    \"\"\"Point graph feature module used in DGCNN.\n\n    Args:\n        mlp_channels (list[int]): Specify of the dgcnn before\n            the global pooling for each graph feature module.\n        num_sample (int, optional): Number of samples in each knn or ball\n            query. Defaults to None.\n        knn_mode (str, optional): Type of KNN method, valid mode\n            ['F-KNN', 'D-KNN']. Defaults to 'F-KNN'.\n        radius (float, optional): Radius to group with.\n            Defaults to None.\n        dilated_group (bool, optional): Whether to use dilated ball query.\n            Defaults to False.\n        norm_cfg (dict, optional): Type of normalization method.\n            Defaults to dict(type='BN2d').\n        act_cfg (dict, optional): Type of activation method.\n            Defaults to dict(type='ReLU').\n        use_xyz (bool, optional): Whether to use xyz as point features.\n            Defaults to True.\n        pool_mode (str, optional): Type of pooling method.\n            Defaults to 'max'.\n        normalize_xyz (bool, optional): If ball query, whether to normalize\n            local XYZ with radius. Defaults to False.\n        bias (bool | str, optional): If specified as `auto`, it will be decided\n            by the norm_cfg. Bias will be set as True if `norm_cfg` is None,\n            otherwise False. Defaults to 'auto'.\n    \"\"\"\n\n    def __init__(self,\n                 mlp_channels,\n                 num_sample=None,\n                 knn_mode='F-KNN',\n                 radius=None,\n                 dilated_group=False,\n                 norm_cfg=dict(type='BN2d'),\n                 act_cfg=dict(type='ReLU'),\n                 use_xyz=True,\n                 pool_mode='max',\n                 normalize_xyz=False,\n                 bias='auto'):\n        super(DGCNNGFModule, self).__init__(\n            mlp_channels=[mlp_channels],\n            sample_nums=[num_sample],\n            knn_modes=[knn_mode],\n            radii=[radius],\n            use_xyz=use_xyz,\n            pool_mode=pool_mode,\n            normalize_xyz=normalize_xyz,\n            dilated_group=dilated_group)\n\n        for i in range(len(self.mlp_channels)):\n            mlp_channel = self.mlp_channels[i]\n\n            mlp = nn.Sequential()\n            for i in range(len(mlp_channel) - 1):\n                mlp.add_module(\n                    f'layer{i}',\n                    ConvModule(\n                        mlp_channel[i],\n                        mlp_channel[i + 1],\n                        kernel_size=(1, 1),\n                        stride=(1, 1),\n                        conv_cfg=dict(type='Conv2d'),\n                        norm_cfg=norm_cfg,\n                        act_cfg=act_cfg,\n                        bias=bias))\n            self.mlps.append(mlp)\n"
  },
  {
    "path": "mmdet3d/ops/norm.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.cnn import NORM_LAYERS\nfrom mmcv.runner import force_fp32\nfrom torch import distributed as dist\nfrom torch import nn as nn\nfrom torch.autograd.function import Function\n\n\nclass AllReduce(Function):\n\n    @staticmethod\n    def forward(ctx, input):\n        input_list = [\n            torch.zeros_like(input) for k in range(dist.get_world_size())\n        ]\n        # Use allgather instead of allreduce in-place operations is unreliable\n        dist.all_gather(input_list, input, async_op=False)\n        inputs = torch.stack(input_list, dim=0)\n        return torch.sum(inputs, dim=0)\n\n    @staticmethod\n    def backward(ctx, grad_output):\n        dist.all_reduce(grad_output, async_op=False)\n        return grad_output\n\n\n@NORM_LAYERS.register_module('naiveSyncBN1d')\nclass NaiveSyncBatchNorm1d(nn.BatchNorm1d):\n    \"\"\"Synchronized Batch Normalization for 3D Tensors.\n\n    Note:\n        This implementation is modified from\n        https://github.com/facebookresearch/detectron2/\n\n        `torch.nn.SyncBatchNorm` has known unknown bugs.\n        It produces significantly worse AP (and sometimes goes NaN)\n        when the batch size on each worker is quite different\n        (e.g., when scale augmentation is used).\n        In 3D detection, different workers has points of different shapes,\n        which also cause instability.\n\n        Use this implementation before `nn.SyncBatchNorm` is fixed.\n        It is slower than `nn.SyncBatchNorm`.\n    \"\"\"\n\n    def __init__(self, *args, **kwargs):\n        super().__init__(*args, **kwargs)\n        self.fp16_enabled = False\n\n    # customized normalization layer still needs this decorator\n    # to force the input to be fp32 and the output to be fp16\n    # TODO: make mmcv fp16 utils handle customized norm layers\n    @force_fp32(out_fp16=True)\n    def forward(self, input):\n        \"\"\"\n        Args:\n            input (tensor): Has shape (N, C) or (N, C, L), where N is\n                the batch size, C is the number of features or\n                channels, and L is the sequence length\n\n        Returns:\n            tensor: Has shape (N, C) or (N, C, L), has same shape\n            as input.\n        \"\"\"\n        assert input.dtype == torch.float32, \\\n            f'input should be in float32 type, got {input.dtype}'\n        using_dist = dist.is_available() and dist.is_initialized()\n        if (not using_dist) or dist.get_world_size() == 1 \\\n                or not self.training:\n            return super().forward(input)\n        assert input.shape[0] > 0, 'SyncBN does not support empty inputs'\n        is_two_dim = input.dim() == 2\n        if is_two_dim:\n            input = input.unsqueeze(2)\n\n        C = input.shape[1]\n        mean = torch.mean(input, dim=[0, 2])\n        meansqr = torch.mean(input * input, dim=[0, 2])\n\n        vec = torch.cat([mean, meansqr], dim=0)\n        vec = AllReduce.apply(vec) * (1.0 / dist.get_world_size())\n\n        mean, meansqr = torch.split(vec, C)\n        var = meansqr - mean * mean\n        self.running_mean += self.momentum * (\n            mean.detach() - self.running_mean)\n        self.running_var += self.momentum * (var.detach() - self.running_var)\n\n        invstd = torch.rsqrt(var + self.eps)\n        scale = self.weight * invstd\n        bias = self.bias - mean * scale\n        scale = scale.reshape(1, -1, 1)\n        bias = bias.reshape(1, -1, 1)\n        output = input * scale + bias\n        if is_two_dim:\n            output = output.squeeze(2)\n        return output\n\n\n@NORM_LAYERS.register_module('naiveSyncBN2d')\nclass NaiveSyncBatchNorm2d(nn.BatchNorm2d):\n    \"\"\"Synchronized Batch Normalization for 4D Tensors.\n\n    Note:\n        This implementation is modified from\n        https://github.com/facebookresearch/detectron2/\n\n        `torch.nn.SyncBatchNorm` has known unknown bugs.\n        It produces significantly worse AP (and sometimes goes NaN)\n        when the batch size on each worker is quite different\n        (e.g., when scale augmentation is used).\n        This phenomenon also occurs when the multi-modality feature fusion\n        modules of multi-modality detectors use SyncBN.\n\n        Use this implementation before `nn.SyncBatchNorm` is fixed.\n        It is slower than `nn.SyncBatchNorm`.\n    \"\"\"\n\n    def __init__(self, *args, **kwargs):\n        super().__init__(*args, **kwargs)\n        self.fp16_enabled = False\n\n    # customized normalization layer still needs this decorator\n    # to force the input to be fp32 and the output to be fp16\n    # TODO: make mmcv fp16 utils handle customized norm layers\n    @force_fp32(out_fp16=True)\n    def forward(self, input):\n        \"\"\"\n        Args:\n            Input (tensor): Feature has shape (N, C, H, W).\n\n        Returns:\n            tensor: Has shape (N, C, H, W), same shape as input.\n        \"\"\"\n        assert input.dtype == torch.float32, \\\n            f'input should be in float32 type, got {input.dtype}'\n        using_dist = dist.is_available() and dist.is_initialized()\n        if (not using_dist) or \\\n                dist.get_world_size() == 1 or \\\n                not self.training:\n            return super().forward(input)\n\n        assert input.shape[0] > 0, 'SyncBN does not support empty inputs'\n        C = input.shape[1]\n        mean = torch.mean(input, dim=[0, 2, 3])\n        meansqr = torch.mean(input * input, dim=[0, 2, 3])\n\n        vec = torch.cat([mean, meansqr], dim=0)\n        vec = AllReduce.apply(vec) * (1.0 / dist.get_world_size())\n\n        mean, meansqr = torch.split(vec, C)\n        var = meansqr - mean * mean\n        self.running_mean += self.momentum * (\n            mean.detach() - self.running_mean)\n        self.running_var += self.momentum * (var.detach() - self.running_var)\n\n        invstd = torch.rsqrt(var + self.eps)\n        scale = self.weight * invstd\n        bias = self.bias - mean * scale\n        scale = scale.reshape(1, -1, 1, 1)\n        bias = bias.reshape(1, -1, 1, 1)\n        return input * scale + bias\n"
  },
  {
    "path": "mmdet3d/ops/ops_dcnv3/functions/__init__.py",
    "content": "# --------------------------------------------------------\n# InternImage\n# Copyright (c) 2022 OpenGVLab\n# Licensed under The MIT License [see LICENSE for details]\n# --------------------------------------------------------\n\nfrom .dcnv3_func import DCNv3Function, dcnv3_core_pytorch\n"
  },
  {
    "path": "mmdet3d/ops/ops_dcnv3/functions/dcnv3_func.py",
    "content": "# --------------------------------------------------------\n# InternImage\n# Copyright (c) 2022 OpenGVLab\n# Licensed under The MIT License [see LICENSE for details]\n# --------------------------------------------------------\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\nimport torch\nimport torch.nn.functional as F\nfrom torch.autograd import Function\nfrom torch.autograd.function import once_differentiable\nfrom torch.cuda.amp import custom_bwd, custom_fwd\nimport DCNv3\n\n\nclass DCNv3Function(Function):\n    @staticmethod\n    @custom_fwd\n    def forward(\n            ctx, input, offset, mask,\n            kernel_h, kernel_w, stride_h, stride_w,\n            pad_h, pad_w, dilation_h, dilation_w,\n            group, group_channels, offset_scale, im2col_step):\n        ctx.kernel_h = kernel_h\n        ctx.kernel_w = kernel_w\n        ctx.stride_h = stride_h\n        ctx.stride_w = stride_w\n        ctx.pad_h = pad_h\n        ctx.pad_w = pad_w\n        ctx.dilation_h = dilation_h\n        ctx.dilation_w = dilation_w\n        ctx.group = group\n        ctx.group_channels = group_channels\n        ctx.offset_scale = offset_scale\n        ctx.im2col_step = im2col_step\n        output = DCNv3.dcnv3_forward(\n            input, offset, mask, kernel_h,\n            kernel_w, stride_h, stride_w, pad_h,\n            pad_w, dilation_h, dilation_w, group,\n            group_channels, offset_scale, ctx.im2col_step)\n        ctx.save_for_backward(input, offset, mask)\n\n        return output\n\n    @staticmethod\n    @once_differentiable\n    @custom_bwd\n    def backward(ctx, grad_output):\n        input, offset, mask = ctx.saved_tensors\n        grad_input, grad_offset, grad_mask = \\\n            DCNv3.dcnv3_backward(\n                input, offset, mask, ctx.kernel_h,\n                ctx.kernel_w, ctx.stride_h, ctx.stride_w, ctx.pad_h,\n                ctx.pad_w, ctx.dilation_h, ctx.dilation_w, ctx.group,\n                ctx.group_channels, ctx.offset_scale, grad_output.contiguous(), ctx.im2col_step)\n\n        return grad_input, grad_offset, grad_mask, \\\n            None, None, None, None, None, None, None, None, None, None, None, None\n\n    @staticmethod\n    def symbolic(g, input, offset, mask, kernel_h, kernel_w, stride_h,\n                 stride_w, pad_h, pad_w, dilation_h, dilation_w, group,\n                 group_channels, offset_scale, im2col_step):\n        \"\"\"Symbolic function for mmdeploy::DCNv3.\n\n        Returns:\n            DCNv3 op for onnx.\n        \"\"\"\n        return g.op(\n            'mmdeploy::TRTDCNv3',\n            input,\n            offset,\n            mask,\n            kernel_h_i=int(kernel_h),\n            kernel_w_i=int(kernel_w),\n            stride_h_i=int(stride_h),\n            stride_w_i=int(stride_w),\n            pad_h_i=int(pad_h),\n            pad_w_i=int(pad_w),\n            dilation_h_i=int(dilation_h),\n            dilation_w_i=int(dilation_w),\n            group_i=int(group),\n            group_channels_i=int(group_channels),\n            offset_scale_f=float(offset_scale),\n            im2col_step_i=int(im2col_step),\n        )\n\ndef _get_reference_points(spatial_shapes, device, kernel_h, kernel_w, dilation_h, dilation_w, pad_h=0, pad_w=0, stride_h=1, stride_w=1):\n    _, H_, W_, _ = spatial_shapes\n    H_out = (H_ - (dilation_h * (kernel_h - 1) + 1)) // stride_h + 1\n    W_out = (W_ - (dilation_w * (kernel_w - 1) + 1)) // stride_w + 1\n\n    ref_y, ref_x = torch.meshgrid(\n        torch.linspace(\n            # pad_h + 0.5,\n            # H_ - pad_h - 0.5,\n            (dilation_h * (kernel_h - 1)) // 2 + 0.5,\n            (dilation_h * (kernel_h - 1)) // 2 + 0.5 + (H_out - 1) * stride_h,\n            H_out,\n            dtype=torch.float32,\n            device=device),\n        torch.linspace(\n            # pad_w + 0.5,\n            # W_ - pad_w - 0.5,\n            (dilation_w * (kernel_w - 1)) // 2 + 0.5,\n            (dilation_w * (kernel_w - 1)) // 2 + 0.5 + (W_out - 1) * stride_w,\n            W_out,\n            dtype=torch.float32,\n            device=device))\n    ref_y = ref_y.reshape(-1)[None] / H_\n    ref_x = ref_x.reshape(-1)[None] / W_\n\n    ref = torch.stack((ref_x, ref_y), -1).reshape(\n        1, H_out, W_out, 1, 2)\n\n    return ref\n\n\ndef _generate_dilation_grids(spatial_shapes, kernel_h, kernel_w, dilation_h, dilation_w, group, device):\n    _, H_, W_, _ = spatial_shapes\n    points_list = []\n    x, y = torch.meshgrid(\n        torch.linspace(\n            -((dilation_w * (kernel_w - 1)) // 2),\n            -((dilation_w * (kernel_w - 1)) // 2) +\n            (kernel_w - 1) * dilation_w, kernel_w,\n            dtype=torch.float32,\n            device=device),\n        torch.linspace(\n            -((dilation_h * (kernel_h - 1)) // 2),\n            -((dilation_h * (kernel_h - 1)) // 2) +\n            (kernel_h - 1) * dilation_h, kernel_h,\n            dtype=torch.float32,\n            device=device))\n\n    points_list.extend([x / W_, y / H_])\n    grid = torch.stack(points_list, -1).reshape(-1, 1, 2).\\\n        repeat(1, group, 1).permute(1, 0, 2)\n    grid = grid.reshape(1, 1, 1, group * kernel_h * kernel_w, 2)\n\n    return grid\n\n\ndef dcnv3_core_pytorch(\n        input, offset, mask, kernel_h,\n        kernel_w, stride_h, stride_w, pad_h,\n        pad_w, dilation_h, dilation_w, group,\n        group_channels, offset_scale):\n    # for debug and test only,\n    # need to use cuda version instead\n    input = F.pad(\n        input,\n        [0, 0, pad_h, pad_h, pad_w, pad_w])\n    N_, H_in, W_in, _ = input.shape\n    _, H_out, W_out, _ = offset.shape\n\n    ref = _get_reference_points(\n        input.shape, input.device, kernel_h, kernel_w, dilation_h, dilation_w, pad_h, pad_w, stride_h, stride_w)\n    grid = _generate_dilation_grids(\n        input.shape, kernel_h, kernel_w, dilation_h, dilation_w, group, input.device)\n    spatial_norm = torch.tensor([W_in, H_in]).reshape(1, 1, 1, 2).\\\n        repeat(1, 1, 1, group*kernel_h*kernel_w).to(input.device)\n\n    sampling_locations = (ref + grid * offset_scale).repeat(N_, 1, 1, 1, 1).flatten(3, 4) + \\\n        offset * offset_scale / spatial_norm\n\n    P_ = kernel_h * kernel_w\n    sampling_grids = 2 * sampling_locations - 1\n    # N_, H_in, W_in, group*group_channels -> N_, H_in*W_in, group*group_channels -> N_, group*group_channels, H_in*W_in -> N_*group, group_channels, H_in, W_in\n    input_ = input.view(N_, H_in*W_in, group*group_channels).transpose(1, 2).\\\n        reshape(N_*group, group_channels, H_in, W_in)\n    # N_, H_out, W_out, group*P_*2 -> N_, H_out*W_out, group, P_, 2 -> N_, group, H_out*W_out, P_, 2 -> N_*group, H_out*W_out, P_, 2\n    sampling_grid_ = sampling_grids.view(N_, H_out*W_out, group, P_, 2).transpose(1, 2).\\\n        flatten(0, 1)\n    # N_*group, group_channels, H_out*W_out, P_\n    sampling_input_ = F.grid_sample(\n        input_, sampling_grid_, mode='bilinear', padding_mode='zeros', align_corners=False)\n\n    # (N_, H_out, W_out, group*P_) -> N_, H_out*W_out, group, P_ -> (N_, group, H_out*W_out, P_) -> (N_*group, 1, H_out*W_out, P_)\n    mask = mask.view(N_, H_out*W_out, group, P_).transpose(1, 2).\\\n        reshape(N_*group, 1, H_out*W_out, P_)\n    output = (sampling_input_ * mask).sum(-1).view(N_,\n                                                   group*group_channels, H_out*W_out)\n\n    return output.transpose(1, 2).reshape(N_, H_out, W_out, -1).contiguous()\n"
  },
  {
    "path": "mmdet3d/ops/ops_dcnv3/make.sh",
    "content": "#!/usr/bin/env bash\n# --------------------------------------------------------\n# InternImage\n# Copyright (c) 2022 OpenGVLab\n# Licensed under The MIT License [see LICENSE for details]\n# --------------------------------------------------------\n\npython setup.py build install\n"
  },
  {
    "path": "mmdet3d/ops/ops_dcnv3/modules/__init__.py",
    "content": "# --------------------------------------------------------\n# InternImage\n# Copyright (c) 2022 OpenGVLab\n# Licensed under The MIT License [see LICENSE for details]\n# --------------------------------------------------------\n\nfrom .dcnv3 import DCNv3, DCNv3_pytorch"
  },
  {
    "path": "mmdet3d/ops/ops_dcnv3/modules/dcnv3.py",
    "content": "# --------------------------------------------------------\n# InternImage\n# Copyright (c) 2022 OpenGVLab\n# Licensed under The MIT License [see LICENSE for details]\n# --------------------------------------------------------\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\nimport warnings\nimport torch\nfrom torch import nn\nimport torch.nn.functional as F\nfrom torch.nn.init import xavier_uniform_, constant_\nfrom ..functions import DCNv3Function, dcnv3_core_pytorch\n\n\nclass to_channels_first(nn.Module):\n\n    def __init__(self):\n        super().__init__()\n\n    def forward(self, x):\n        return x.permute(0, 3, 1, 2)\n\n\nclass to_channels_last(nn.Module):\n\n    def __init__(self):\n        super().__init__()\n\n    def forward(self, x):\n        return x.permute(0, 2, 3, 1)\n\n\ndef build_norm_layer(dim,\n                     norm_layer,\n                     in_format='channels_last',\n                     out_format='channels_last',\n                     eps=1e-6):\n    layers = []\n    if norm_layer == 'BN':\n        if in_format == 'channels_last':\n            layers.append(to_channels_first())\n        layers.append(nn.BatchNorm2d(dim))\n        if out_format == 'channels_last':\n            layers.append(to_channels_last())\n    elif norm_layer == 'LN':\n        if in_format == 'channels_first':\n            layers.append(to_channels_last())\n        layers.append(nn.LayerNorm(dim, eps=eps))\n        if out_format == 'channels_first':\n            layers.append(to_channels_first())\n    else:\n        raise NotImplementedError(\n            f'build_norm_layer does not support {norm_layer}')\n    return nn.Sequential(*layers)\n\n\ndef build_act_layer(act_layer):\n    if act_layer == 'ReLU':\n        return nn.ReLU(inplace=True)\n    elif act_layer == 'SiLU':\n        return nn.SiLU(inplace=True)\n    elif act_layer == 'GELU':\n        return nn.GELU()\n\n    raise NotImplementedError(f'build_act_layer does not support {act_layer}')\n\n\ndef _is_power_of_2(n):\n    if (not isinstance(n, int)) or (n < 0):\n        raise ValueError(\n            \"invalid input for _is_power_of_2: {} (type: {})\".format(n, type(n)))\n\n    return (n & (n - 1) == 0) and n != 0\n\n\nclass CenterFeatureScaleModule(nn.Module):\n    def forward(self,\n                query,\n                center_feature_scale_proj_weight,\n                center_feature_scale_proj_bias):\n        center_feature_scale = F.linear(query,\n                                        weight=center_feature_scale_proj_weight,\n                                        bias=center_feature_scale_proj_bias).sigmoid()\n        return center_feature_scale\n\n\nclass DCNv3_pytorch(nn.Module):\n    def __init__(\n            self,\n            channels=64,\n            kernel_size=3,\n            dw_kernel_size=None,\n            stride=1,\n            pad=1,\n            dilation=1,\n            group=4,\n            offset_scale=1.0,\n            act_layer='GELU',\n            norm_layer='LN',\n            center_feature_scale=False):\n        \"\"\"\n        DCNv3 Module\n        :param channels\n        :param kernel_size\n        :param stride\n        :param pad\n        :param dilation\n        :param group\n        :param offset_scale\n        :param act_layer\n        :param norm_layer\n        \"\"\"\n        super().__init__()\n        if channels % group != 0:\n            raise ValueError(\n                f'channels must be divisible by group, but got {channels} and {group}')\n        _d_per_group = channels // group\n        dw_kernel_size = dw_kernel_size if dw_kernel_size is not None else kernel_size\n        # you'd better set _d_per_group to a power of 2 which is more efficient in our CUDA implementation\n        if not _is_power_of_2(_d_per_group):\n            warnings.warn(\n                \"You'd better set channels in DCNv3 to make the dimension of each attention head a power of 2 \"\n                \"which is more efficient in our CUDA implementation.\")\n\n        self.offset_scale = offset_scale\n        self.channels = channels\n        self.kernel_size = kernel_size\n        self.dw_kernel_size = dw_kernel_size\n        self.stride = stride\n        self.dilation = dilation\n        self.pad = pad\n        self.group = group\n        self.group_channels = channels // group\n        self.offset_scale = offset_scale\n        self.center_feature_scale = center_feature_scale\n\n        self.dw_conv = nn.Sequential(\n            nn.Conv2d(\n                channels,\n                channels,\n                kernel_size=dw_kernel_size,\n                stride=1,\n                padding=(dw_kernel_size - 1) // 2,\n                groups=channels),\n            build_norm_layer(\n                channels,\n                norm_layer,\n                'channels_first',\n                'channels_last'),\n            build_act_layer(act_layer))\n        self.offset = nn.Linear(\n            channels,\n            group * kernel_size * kernel_size * 2)\n        self.mask = nn.Linear(\n            channels,\n            group * kernel_size * kernel_size)\n        self.input_proj = nn.Linear(channels, channels)\n        self.output_proj = nn.Linear(channels, channels)\n        self._reset_parameters()\n        \n        if center_feature_scale:\n            self.center_feature_scale_proj_weight = nn.Parameter(\n                torch.zeros((group, channels), dtype=torch.float))\n            self.center_feature_scale_proj_bias = nn.Parameter(\n                torch.tensor(0.0, dtype=torch.float).view((1,)).repeat(group, ))\n            self.center_feature_scale_module = CenterFeatureScaleModule()\n\n    def _reset_parameters(self):\n        constant_(self.offset.weight.data, 0.)\n        constant_(self.offset.bias.data, 0.)\n        constant_(self.mask.weight.data, 0.)\n        constant_(self.mask.bias.data, 0.)\n        xavier_uniform_(self.input_proj.weight.data)\n        constant_(self.input_proj.bias.data, 0.)\n        xavier_uniform_(self.output_proj.weight.data)\n        constant_(self.output_proj.bias.data, 0.)\n\n    def forward(self, input):\n        \"\"\"\n        :param query                       (N, H, W, C)\n        :return output                     (N, H, W, C)\n        \"\"\"\n        N, H, W, _ = input.shape\n\n        x = self.input_proj(input)\n        x_proj = x\n\n        x1 = input.permute(0, 3, 1, 2)\n        x1 = self.dw_conv(x1)\n        offset = self.offset(x1)\n        mask = self.mask(x1).reshape(N, H, W, self.group, -1)\n        mask = F.softmax(mask, -1).reshape(N, H, W, -1)\n\n        x = dcnv3_core_pytorch(\n            x, offset, mask,\n            self.kernel_size, self.kernel_size,\n            self.stride, self.stride,\n            self.pad, self.pad,\n            self.dilation, self.dilation,\n            self.group, self.group_channels,\n            self.offset_scale)\n        if self.center_feature_scale:\n            center_feature_scale = self.center_feature_scale_module(\n                x1, self.center_feature_scale_proj_weight, self.center_feature_scale_proj_bias)\n            # N, H, W, groups -> N, H, W, groups, 1 -> N, H, W, groups, _d_per_group -> N, H, W, channels\n            center_feature_scale = center_feature_scale[..., None].repeat(\n                1, 1, 1, 1, self.channels // self.group).flatten(-2)\n            x = x * (1 - center_feature_scale) + x_proj * center_feature_scale\n        x = self.output_proj(x)\n\n        return x\n\n\nclass DCNv3(nn.Module):\n    def __init__(\n            self,\n            channels=64,\n            kernel_size=3,\n            dw_kernel_size=None,\n            stride=1,\n            pad=1,\n            dilation=1,\n            group=4,\n            offset_scale=1.0,\n            act_layer='GELU',\n            norm_layer='LN',\n            center_feature_scale=False):\n        \"\"\"\n        DCNv3 Module\n        :param channels\n        :param kernel_size\n        :param stride\n        :param pad\n        :param dilation\n        :param group\n        :param offset_scale\n        :param act_layer\n        :param norm_layer\n        \"\"\"\n        super().__init__()\n        if channels % group != 0:\n            raise ValueError(\n                f'channels must be divisible by group, but got {channels} and {group}')\n        _d_per_group = channels // group\n        dw_kernel_size = dw_kernel_size if dw_kernel_size is not None else kernel_size\n        # you'd better set _d_per_group to a power of 2 which is more efficient in our CUDA implementation\n        if not _is_power_of_2(_d_per_group):\n            warnings.warn(\n                \"You'd better set channels in DCNv3 to make the dimension of each attention head a power of 2 \"\n                \"which is more efficient in our CUDA implementation.\")\n\n        self.offset_scale = offset_scale\n        self.channels = channels\n        self.kernel_size = kernel_size\n        self.dw_kernel_size = dw_kernel_size\n        self.stride = stride\n        self.dilation = dilation\n        self.pad = pad\n        self.group = group\n        self.group_channels = channels // group\n        self.offset_scale = offset_scale\n        self.center_feature_scale = center_feature_scale\n        \n        self.dw_conv = nn.Sequential(\n            nn.Conv2d(\n                channels,\n                channels,\n                kernel_size=dw_kernel_size,\n                stride=1,\n                padding=(dw_kernel_size - 1) // 2,\n                groups=channels),\n            build_norm_layer(\n                channels,\n                norm_layer,\n                'channels_first',\n                'channels_last'),\n            build_act_layer(act_layer))\n        self.offset = nn.Linear(\n            channels,\n            group * kernel_size * kernel_size * 2)\n        self.mask = nn.Linear(\n            channels,\n            group * kernel_size * kernel_size)\n        self.input_proj = nn.Linear(channels, channels)\n        self.output_proj = nn.Linear(channels, channels)\n        self._reset_parameters()\n        \n        if center_feature_scale:\n            self.center_feature_scale_proj_weight = nn.Parameter(\n                torch.zeros((group, channels), dtype=torch.float))\n            self.center_feature_scale_proj_bias = nn.Parameter(\n                torch.tensor(0.0, dtype=torch.float).view((1,)).repeat(group, ))\n            self.center_feature_scale_module = CenterFeatureScaleModule()\n\n    def _reset_parameters(self):\n        constant_(self.offset.weight.data, 0.)\n        constant_(self.offset.bias.data, 0.)\n        constant_(self.mask.weight.data, 0.)\n        constant_(self.mask.bias.data, 0.)\n        xavier_uniform_(self.input_proj.weight.data)\n        constant_(self.input_proj.bias.data, 0.)\n        xavier_uniform_(self.output_proj.weight.data)\n        constant_(self.output_proj.bias.data, 0.)\n\n    def forward(self, input):\n        \"\"\"\n        :param query                       (N, H, W, C)\n        :return output                     (N, H, W, C)\n        \"\"\"\n        N, H, W, _ = input.shape\n\n        x = self.input_proj(input)\n        x_proj = x\n        dtype = x.dtype\n\n        x1 = input.permute(0, 3, 1, 2)\n        x1 = self.dw_conv(x1)\n        offset = self.offset(x1)\n        mask = self.mask(x1).reshape(N, H, W, self.group, -1)\n        mask = F.softmax(mask, -1).reshape(N, H, W, -1).type(dtype)\n\n        x = DCNv3Function.apply(\n            x, offset, mask,\n            self.kernel_size, self.kernel_size,\n            self.stride, self.stride,\n            self.pad, self.pad,\n            self.dilation, self.dilation,\n            self.group, self.group_channels,\n            self.offset_scale,\n            256)\n        \n        if self.center_feature_scale:\n            center_feature_scale = self.center_feature_scale_module(\n                x1, self.center_feature_scale_proj_weight, self.center_feature_scale_proj_bias)\n            # N, H, W, groups -> N, H, W, groups, 1 -> N, H, W, groups, _d_per_group -> N, H, W, channels\n            center_feature_scale = center_feature_scale[..., None].repeat(\n                1, 1, 1, 1, self.channels // self.group).flatten(-2)\n            x = x * (1 - center_feature_scale) + x_proj * center_feature_scale\n        x = self.output_proj(x)\n\n        return x\n"
  },
  {
    "path": "mmdet3d/ops/ops_dcnv3/setup.py",
    "content": "# --------------------------------------------------------\n# InternImage\n# Copyright (c) 2022 OpenGVLab\n# Licensed under The MIT License [see LICENSE for details]\n# --------------------------------------------------------\n\nimport os\nimport glob\n\nimport torch\n\nfrom torch.utils.cpp_extension import CUDA_HOME\nfrom torch.utils.cpp_extension import CppExtension\nfrom torch.utils.cpp_extension import CUDAExtension\n\nfrom setuptools import find_packages\nfrom setuptools import setup\n\nrequirements = [\"torch\", \"torchvision\"]\n\n\ndef get_extensions():\n    this_dir = os.path.dirname(os.path.abspath(__file__))\n    extensions_dir = os.path.join(this_dir, \"src\")\n\n    main_file = glob.glob(os.path.join(extensions_dir, \"*.cpp\"))\n    source_cpu = glob.glob(os.path.join(extensions_dir, \"cpu\", \"*.cpp\"))\n    source_cuda = glob.glob(os.path.join(extensions_dir, \"cuda\", \"*.cu\"))\n\n    sources = main_file + source_cpu\n    extension = CppExtension\n    extra_compile_args = {\"cxx\": []}\n    define_macros = []\n\n    if torch.cuda.is_available() and CUDA_HOME is not None:\n        extension = CUDAExtension\n        sources += source_cuda\n        define_macros += [(\"WITH_CUDA\", None)]\n        extra_compile_args[\"nvcc\"] = [\n            # \"-DCUDA_HAS_FP16=1\",\n            # \"-D__CUDA_NO_HALF_OPERATORS__\",\n            # \"-D__CUDA_NO_HALF_CONVERSIONS__\",\n            # \"-D__CUDA_NO_HALF2_OPERATORS__\",\n        ]\n    else:\n        raise NotImplementedError('Cuda is not availabel')\n\n    sources = [os.path.join(extensions_dir, s) for s in sources]\n    include_dirs = [extensions_dir]\n    ext_modules = [\n        extension(\n            \"DCNv3\",\n            sources,\n            include_dirs=include_dirs,\n            define_macros=define_macros,\n            extra_compile_args=extra_compile_args,\n        )\n    ]\n    return ext_modules\n\n\nsetup(\n    name=\"DCNv3\",\n    version=\"1.0\",\n    author=\"InternImage\",\n    url=\"https://github.com/OpenGVLab/InternImage\",\n    description=\n    \"PyTorch Wrapper for CUDA Functions of DCNv3\",\n    packages=find_packages(exclude=(\n        \"configs\",\n        \"tests\",\n    )),\n    ext_modules=get_extensions(),\n    cmdclass={\"build_ext\": torch.utils.cpp_extension.BuildExtension},\n)\n"
  },
  {
    "path": "mmdet3d/ops/ops_dcnv3/src/cpu/dcnv3_cpu.cpp",
    "content": "/*!\n**************************************************************************************************\n* InternImage\n* Copyright (c) 2022 OpenGVLab\n* Licensed under The MIT License [see LICENSE for details]\n**************************************************************************************************\n* Modified from\n*https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0\n**************************************************************************************************\n*/\n\n#include <vector>\n\n#include <ATen/ATen.h>\n#include <ATen/cuda/CUDAContext.h>\n\nat::Tensor dcnv3_cpu_forward(const at::Tensor &input, const at::Tensor &offset,\n                             const at::Tensor &mask, const int kernel_h,\n                             const int kernel_w, const int stride_h,\n                             const int stride_w, const int pad_h,\n                             const int pad_w, const int dilation_h,\n                             const int dilation_w, const int group,\n                             const int group_channels, const float offset_scale,\n                             const int im2col_step) {\n    AT_ERROR(\"Not implement on cpu\");\n}\n\nstd::vector<at::Tensor>\ndcnv3_cpu_backward(const at::Tensor &input, const at::Tensor &offset,\n                   const at::Tensor &mask, const int kernel_h,\n                   const int kernel_w, const int stride_h, const int stride_w,\n                   const int pad_h, const int pad_w, const int dilation_h,\n                   const int dilation_w, const int group,\n                   const int group_channels, const float offset_scale,\n                   const at::Tensor &grad_output, const int im2col_step) {\n    AT_ERROR(\"Not implement on cpu\");\n}\n"
  },
  {
    "path": "mmdet3d/ops/ops_dcnv3/src/cpu/dcnv3_cpu.h",
    "content": "/*!\n**************************************************************************************************\n* InternImage\n* Copyright (c) 2022 OpenGVLab\n* Licensed under The MIT License [see LICENSE for details]\n**************************************************************************************************\n* Modified from\n*https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0\n**************************************************************************************************\n*/\n\n#pragma once\n#include <torch/extension.h>\n\nat::Tensor dcnv3_cpu_forward(const at::Tensor &input, const at::Tensor &offset,\n                             const at::Tensor &mask, const int kernel_h,\n                             const int kernel_w, const int stride_h,\n                             const int stride_w, const int pad_h,\n                             const int pad_w, const int dilation_h,\n                             const int dilation_w, const int group,\n                             const int group_channels, const float offset_scale,\n                             const int im2col_step);\n\nstd::vector<at::Tensor>\ndcnv3_cpu_backward(const at::Tensor &input, const at::Tensor &offset,\n                   const at::Tensor &mask, const int kernel_h,\n                   const int kernel_w, const int stride_h, const int stride_w,\n                   const int pad_h, const int pad_w, const int dilation_h,\n                   const int dilation_w, const int group,\n                   const int group_channels, const float offset_scale,\n                   const at::Tensor &grad_output, const int im2col_step);\n"
  },
  {
    "path": "mmdet3d/ops/ops_dcnv3/src/cuda/dcnv3_cuda.cu",
    "content": "/*!\n**************************************************************************************************\n* InternImage\n* Copyright (c) 2022 OpenGVLab\n* Licensed under The MIT License [see LICENSE for details]\n**************************************************************************************************\n* Modified from\n*https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0\n**************************************************************************************************\n*/\n\n#include \"cuda/dcnv3_im2col_cuda.cuh\"\n#include <vector>\n\n#include <ATen/ATen.h>\n#include <ATen/cuda/CUDAContext.h>\n#include <cuda.h>\n#include <cuda_runtime.h>\n#include <torch/torch.h>\n\nat::Tensor dcnv3_cuda_forward(const at::Tensor &input, const at::Tensor &offset,\n                              const at::Tensor &mask, const int kernel_h,\n                              const int kernel_w, const int stride_h,\n                              const int stride_w, const int pad_h,\n                              const int pad_w, const int dilation_h,\n                              const int dilation_w, const int group,\n                              const int group_channels,\n                              const float offset_scale, const int im2col_step) {\n    AT_ASSERTM(input.is_contiguous(), \"input tensor has to be contiguous\");\n    AT_ASSERTM(offset.is_contiguous(), \"offset tensor has to be contiguous\");\n    AT_ASSERTM(mask.is_contiguous(), \"mask tensor has to be contiguous\");\n    AT_ASSERTM(input.type().is_cuda(), \"input must be a CUDA tensor\");\n    AT_ASSERTM(offset.type().is_cuda(), \"offset must be a CUDA tensor\");\n    AT_ASSERTM(mask.type().is_cuda(), \"mask must be a CUDA tensor\");\n\n    const int batch = input.size(0);\n    const int height_in = input.size(1);\n    const int width_in = input.size(2);\n    const int channels = input.size(3);\n    const int height_out =\n        (height_in + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h +\n        1;\n    const int width_out =\n        (width_in + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w +\n        1;\n    const int im2col_step_ = std::min(batch, im2col_step);\n\n    AT_ASSERTM(batch % im2col_step_ == 0,\n               \"batch(%d) must divide im2col_step(%d)\", batch, im2col_step_);\n    AT_ASSERTM(\n        channels == (group * group_channels),\n        \"Input channels and group times group channels wont match: (%d vs %d).\",\n        channels, group * group_channels);\n\n    auto output =\n        at::zeros({batch, height_out, width_out, group * group_channels},\n                  input.options());\n\n    const int batch_n = im2col_step_;\n    auto output_n = output.view({batch / batch_n, batch_n, height_out,\n                                 width_out, group * group_channels});\n    auto per_input_size = height_in * width_in * group * group_channels;\n    auto per_offset_size =\n        height_out * width_out * group * kernel_h * kernel_w * 2;\n    auto per_mask_size = height_out * width_out * group * kernel_h * kernel_w;\n    for (int n = 0; n < batch / im2col_step_; ++n) {\n        auto columns = output_n.select(0, n);\n        // AT_DISPATCH_FLOATING_TYPES(\n        AT_DISPATCH_FLOATING_TYPES_AND_HALF(\n            input.type(), \"ms_deform_attn_forward_cuda\", ([&] {\n                dcnv3_im2col_cuda(\n                    at::cuda::getCurrentCUDAStream(),\n                    input.data<scalar_t>() + n * im2col_step_ * per_input_size,\n                    offset.data<scalar_t>() +\n                        n * im2col_step_ * per_offset_size,\n                    mask.data<scalar_t>() + n * im2col_step_ * per_mask_size,\n                    columns.data<scalar_t>(), kernel_h, kernel_w, stride_h,\n                    stride_w, pad_h, pad_w, dilation_h, dilation_w, group,\n                    group_channels, batch_n, height_in, width_in, height_out,\n                    width_out, offset_scale);\n            }));\n    }\n\n    return output;\n}\n\nstd::vector<at::Tensor>\ndcnv3_cuda_backward(const at::Tensor &input, const at::Tensor &offset,\n                    const at::Tensor &mask, const int kernel_h,\n                    const int kernel_w, const int stride_h, const int stride_w,\n                    const int pad_h, const int pad_w, const int dilation_h,\n                    const int dilation_w, const int group,\n                    const int group_channels, const float offset_scale,\n                    const at::Tensor &grad_output, const int im2col_step) {\n\n    AT_ASSERTM(input.is_contiguous(), \"input tensor has to be contiguous\");\n    AT_ASSERTM(offset.is_contiguous(), \"offset tensor has to be contiguous\");\n    AT_ASSERTM(mask.is_contiguous(), \"mask tensor has to be contiguous\");\n    AT_ASSERTM(grad_output.is_contiguous(),\n               \"grad_output tensor has to be contiguous\");\n    AT_ASSERTM(input.type().is_cuda(), \"input must be a CUDA tensor\");\n    AT_ASSERTM(offset.type().is_cuda(), \"offset must be a CUDA tensor\");\n    AT_ASSERTM(mask.type().is_cuda(), \"mask must be a CUDA tensor\");\n    AT_ASSERTM(grad_output.type().is_cuda(),\n               \"grad_output must be a CUDA tensor\");\n\n    const int batch = input.size(0);\n    const int height_in = input.size(1);\n    const int width_in = input.size(2);\n    const int channels = input.size(3);\n    const int height_out =\n        (height_in + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h +\n        1;\n    const int width_out =\n        (width_in + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w +\n        1;\n    const int im2col_step_ = std::min(batch, im2col_step);\n\n    AT_ASSERTM(batch % im2col_step_ == 0,\n               \"batch(%d) must divide im2col_step(%d)\", batch, im2col_step_);\n    AT_ASSERTM(\n        channels == (group * group_channels),\n        \"Input channels and group times group channels wont match: (%d vs %d).\",\n        channels, group * group_channels);\n\n    auto dtype = input.dtype();\n    if (dtype == at::kHalf) {\n        dtype = at::kFloat;\n    }\n\n    auto grad_input = at::zeros_like(input, dtype);\n    auto grad_offset = at::zeros_like(offset, dtype);\n    auto grad_mask = at::zeros_like(mask, dtype);\n\n    const int batch_n = im2col_step_;\n    auto per_input_size = height_in * width_in * group * group_channels;\n    auto per_offset_size =\n        height_out * width_out * group * kernel_h * kernel_w * 2;\n    auto per_mask_size = height_out * width_out * group * kernel_h * kernel_w;\n    auto grad_output_n =\n        grad_output.view({batch / im2col_step_, batch_n, height_out * width_out,\n                          group, group_channels});\n\n    for (int n = 0; n < batch / im2col_step_; ++n) {\n        auto grad_output_g = grad_output_n.select(0, n);\n        // AT_DISPATCH_FLOATING_TYPES(\n        AT_DISPATCH_FLOATING_TYPES_AND_HALF(\n            input.type(), \"ms_deform_attn_backward_cuda\", ([&] {\n                dcnv3_col2im_cuda(\n                    at::cuda::getCurrentCUDAStream(),\n                    grad_output_g.data<scalar_t>(),\n                    input.data<scalar_t>() + n * im2col_step_ * per_input_size,\n                    offset.data<scalar_t>() +\n                        n * im2col_step_ * per_offset_size,\n                    mask.data<scalar_t>() + n * im2col_step_ * per_mask_size,\n                    kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w,\n                    dilation_h, dilation_w, group, group_channels, batch_n,\n                    height_in, width_in, height_out, width_out, offset_scale,\n                    grad_input.data<opmath_t>() +\n                        n * im2col_step_ * per_input_size,\n                    grad_offset.data<opmath_t>() +\n                        n * im2col_step_ * per_offset_size,\n                    grad_mask.data<opmath_t>() +\n                        n * im2col_step_ * per_mask_size);\n            }));\n    }\n\n    if (input.dtype() == torch::kHalf) {\n        return {grad_input.to(torch::kHalf), grad_offset.to(torch::kHalf),\n                grad_mask.to(torch::kHalf)};\n    } else {\n        return {grad_input, grad_offset, grad_mask};\n    }\n}"
  },
  {
    "path": "mmdet3d/ops/ops_dcnv3/src/cuda/dcnv3_cuda.h",
    "content": "/*!\n**************************************************************************************************\n* InternImage\n* Copyright (c) 2022 OpenGVLab\n* Licensed under The MIT License [see LICENSE for details]\n**************************************************************************************************\n* Modified from\n*https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0\n**************************************************************************************************\n*/\n\n#pragma once\n#include <torch/extension.h>\n\nat::Tensor dcnv3_cuda_forward(const at::Tensor &input, const at::Tensor &offset,\n                              const at::Tensor &mask, const int kernel_h,\n                              const int kernel_w, const int stride_h,\n                              const int stride_w, const int pad_h,\n                              const int pad_w, const int dilation_h,\n                              const int dilation_w, const int group,\n                              const int group_channels,\n                              const float offset_scale, const int im2col_step);\n\nstd::vector<at::Tensor>\ndcnv3_cuda_backward(const at::Tensor &input, const at::Tensor &offset,\n                    const at::Tensor &mask, const int kernel_h,\n                    const int kernel_w, const int stride_h, const int stride_w,\n                    const int pad_h, const int pad_w, const int dilation_h,\n                    const int dilation_w, const int group,\n                    const int group_channels, const float offset_scale,\n                    const at::Tensor &grad_output, const int im2col_step);\n"
  },
  {
    "path": "mmdet3d/ops/ops_dcnv3/src/cuda/dcnv3_im2col_cuda.cuh",
    "content": "/*!\n**************************************************************************************************\n* InternImage\n* Copyright (c) 2022 OpenGVLab\n* Licensed under The MIT License [see LICENSE for details]\n**************************************************************************************************\n* Modified from\n*https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0\n**************************************************************************************************\n*/\n\n#include <algorithm>\n#include <cstdio>\n#include <cstring>\n\n#include <ATen/ATen.h>\n#include <ATen/OpMathType.h>\n#include <ATen/cuda/CUDAContext.h>\n#include <THC/THCAtomics.cuh>\n\n#define CUDA_KERNEL_LOOP(i, n)                                                 \\\n    for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n);               \\\n         i += blockDim.x * gridDim.x)\n\nconst int CUDA_NUM_THREADS = 256;\ninline int GET_BLOCKS(const int N, const int num_threads) {\n    return (N + num_threads - 1) / num_threads;\n}\n\n#define opmath_t at::opmath_type<scalar_t>\n\ntemplate <typename scalar_t>\n__device__ opmath_t dcnv3_im2col_bilinear(const scalar_t *&bottom_data,\n                                          const int &height, const int &width,\n                                          const int &group,\n                                          const int &group_channels,\n                                          const opmath_t &h, const opmath_t &w,\n                                          const int &g, const int &c) {\n    const int h_low = floor(h);\n    const int w_low = floor(w);\n    const int h_high = h_low + 1;\n    const int w_high = w_low + 1;\n\n    const opmath_t lh = h - h_low;\n    const opmath_t lw = w - w_low;\n    const opmath_t hh = 1 - lh, hw = 1 - lw;\n\n    const int w_stride = group * group_channels;\n    const int h_stride = width * w_stride;\n    const int h_low_ptr_offset = h_low * h_stride;\n    const int h_high_ptr_offset = h_low_ptr_offset + h_stride;\n    const int w_low_ptr_offset = w_low * w_stride;\n    const int w_high_ptr_offset = w_low_ptr_offset + w_stride;\n    const int base_ptr = g * group_channels + c;\n\n    opmath_t v1 = 0;\n    if (h_low >= 0 && w_low >= 0) {\n        const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr;\n        v1 = bottom_data[ptr1];\n    }\n    opmath_t v2 = 0;\n    if (h_low >= 0 && w_high <= width - 1) {\n        const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr;\n        v2 = bottom_data[ptr2];\n    }\n    opmath_t v3 = 0;\n    if (h_high <= height - 1 && w_low >= 0) {\n        const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr;\n        v3 = bottom_data[ptr3];\n    }\n    opmath_t v4 = 0;\n    if (h_high <= height - 1 && w_high <= width - 1) {\n        const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr;\n        v4 = bottom_data[ptr4];\n    }\n    const opmath_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;\n\n    const opmath_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);\n    return val;\n}\n\ntemplate <typename scalar_t>\n__device__ void dcnv3_col2im_bilinear(\n    const scalar_t *&bottom_data, const int &height, const int &width,\n    const int &nheads, const int &group_channels, const opmath_t &h,\n    const opmath_t &w, const int &m, const int &c, const opmath_t offset_scale,\n    const opmath_t &top_grad, const opmath_t &mask, opmath_t *&grad_im,\n    opmath_t *grad_offset, opmath_t *grad_mask) {\n    const int h_low = floor(h);\n    const int w_low = floor(w);\n    const int h_high = h_low + 1;\n    const int w_high = w_low + 1;\n\n    const opmath_t lh = h - h_low;\n    const opmath_t lw = w - w_low;\n    const opmath_t hh = 1 - lh, hw = 1 - lw;\n\n    const int w_stride = nheads * group_channels;\n    const int h_stride = width * w_stride;\n    const int h_low_ptr_offset = h_low * h_stride;\n    const int h_high_ptr_offset = h_low_ptr_offset + h_stride;\n    const int w_low_ptr_offset = w_low * w_stride;\n    const int w_high_ptr_offset = w_low_ptr_offset + w_stride;\n    const int base_ptr = m * group_channels + c;\n\n    const opmath_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;\n    const opmath_t top_grad_im = top_grad * mask;\n    opmath_t grad_h_weight = 0, grad_w_weight = 0;\n\n    opmath_t v1 = 0;\n    if (h_low >= 0 && w_low >= 0) {\n        const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr;\n        v1 = bottom_data[ptr1];\n        grad_h_weight -= hw * v1;\n        grad_w_weight -= hh * v1;\n        atomicAdd(grad_im + ptr1, w1 * top_grad_im);\n    }\n    opmath_t v2 = 0;\n    if (h_low >= 0 && w_high <= width - 1) {\n        const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr;\n        v2 = bottom_data[ptr2];\n        grad_h_weight -= lw * v2;\n        grad_w_weight += hh * v2;\n        atomicAdd(grad_im + ptr2, w2 * top_grad_im);\n    }\n    opmath_t v3 = 0;\n    if (h_high <= height - 1 && w_low >= 0) {\n        const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr;\n        v3 = bottom_data[ptr3];\n        grad_h_weight += hw * v3;\n        grad_w_weight -= lh * v3;\n        atomicAdd(grad_im + ptr3, w3 * top_grad_im);\n    }\n    opmath_t v4 = 0;\n    if (h_high <= height - 1 && w_high <= width - 1) {\n        const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr;\n        v4 = bottom_data[ptr4];\n        grad_h_weight += lw * v4;\n        grad_w_weight += lh * v4;\n        atomicAdd(grad_im + ptr4, w4 * top_grad_im);\n    }\n\n    const opmath_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);\n    *grad_mask = top_grad * val;\n    *grad_offset = offset_scale * grad_w_weight * top_grad_im;\n    *(grad_offset + 1) = offset_scale * grad_h_weight * top_grad_im;\n}\n\ntemplate <typename scalar_t>\n__device__ void dcnv3_col2im_bilinear_gm(\n    const scalar_t *&bottom_data, const int &height, const int &width,\n    const int &nheads, const int &group_channels, const opmath_t &h,\n    const opmath_t &w, const int &m, const int &c, const opmath_t offset_scale,\n    const opmath_t &top_grad, const opmath_t &mask, opmath_t *&grad_im,\n    opmath_t *grad_offset, opmath_t *grad_mask) {\n    const int h_low = floor(h);\n    const int w_low = floor(w);\n    const int h_high = h_low + 1;\n    const int w_high = w_low + 1;\n\n    const opmath_t lh = h - h_low;\n    const opmath_t lw = w - w_low;\n    const opmath_t hh = 1 - lh, hw = 1 - lw;\n\n    const int w_stride = nheads * group_channels;\n    const int h_stride = width * w_stride;\n    const int h_low_ptr_offset = h_low * h_stride;\n    const int h_high_ptr_offset = h_low_ptr_offset + h_stride;\n    const int w_low_ptr_offset = w_low * w_stride;\n    const int w_high_ptr_offset = w_low_ptr_offset + w_stride;\n    const int base_ptr = m * group_channels + c;\n\n    const opmath_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;\n    const opmath_t top_grad_im = top_grad * mask;\n    opmath_t grad_h_weight = 0, grad_w_weight = 0;\n\n    opmath_t v1 = 0;\n    if (h_low >= 0 && w_low >= 0) {\n        const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr;\n        v1 = bottom_data[ptr1];\n        grad_h_weight -= hw * v1;\n        grad_w_weight -= hh * v1;\n        atomicAdd(grad_im + ptr1, w1 * top_grad_im);\n    }\n    opmath_t v2 = 0;\n    if (h_low >= 0 && w_high <= width - 1) {\n        const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr;\n        v2 = bottom_data[ptr2];\n        grad_h_weight -= lw * v2;\n        grad_w_weight += hh * v2;\n        atomicAdd(grad_im + ptr2, w2 * top_grad_im);\n    }\n    opmath_t v3 = 0;\n    if (h_high <= height - 1 && w_low >= 0) {\n        const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr;\n        v3 = bottom_data[ptr3];\n        grad_h_weight += hw * v3;\n        grad_w_weight -= lh * v3;\n        atomicAdd(grad_im + ptr3, w3 * top_grad_im);\n    }\n    opmath_t v4 = 0;\n    if (h_high <= height - 1 && w_high <= width - 1) {\n        const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr;\n        v4 = bottom_data[ptr4];\n        grad_h_weight += lw * v4;\n        grad_w_weight += lh * v4;\n        atomicAdd(grad_im + ptr4, w4 * top_grad_im);\n    }\n\n    const opmath_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);\n    atomicAdd(grad_mask, top_grad * val);\n    atomicAdd(grad_offset, offset_scale * grad_w_weight * top_grad_im);\n    atomicAdd(grad_offset + 1, offset_scale * grad_h_weight * top_grad_im);\n}\n\ntemplate <typename scalar_t>\n__global__ void dcnv3_im2col_gpu_kernel(\n    const int num_kernels, const scalar_t *data_im, const scalar_t *data_offset,\n    const scalar_t *data_mask, scalar_t *data_col, const int kernel_h,\n    const int kernel_w, const int stride_h, const int stride_w, const int pad_h,\n    const int pad_w, const int dilation_h, const int dilation_w,\n    const int group, const int group_channels, const int height_in,\n    const int width_in, const int height_out, const int width_out,\n    const opmath_t offset_scale) {\n    CUDA_KERNEL_LOOP(index, num_kernels) {\n        int _temp = index;\n        const int c_col = _temp % group_channels;\n        _temp /= group_channels;\n        const int sampling_index = _temp;\n        const int g_col = _temp % group;\n        _temp /= group;\n        const int p0_w = ((dilation_w * (kernel_w - 1)) >> 1) - pad_w +\n                         (_temp % width_out) * stride_w;\n        _temp /= width_out;\n        const int p0_h = ((dilation_h * (kernel_h - 1)) >> 1) - pad_h +\n                         (_temp % height_out) * stride_h;\n        _temp /= height_out;\n        const int b_col = _temp;\n\n        const int input_size = height_in * width_in;\n        scalar_t *data_col_ptr = data_col + index;\n        const int kernel_size = kernel_h * kernel_w;\n        int data_weight_ptr = sampling_index * kernel_size;\n        int data_loc_w_ptr = data_weight_ptr << 1;\n        const int qid_stride = group * group_channels;\n        opmath_t col = 0;\n        const scalar_t *data_im_ptr = data_im + b_col * input_size * qid_stride;\n        // top-left\n        const opmath_t p0_w_ =\n            p0_w - ((dilation_w * (kernel_w - 1)) >> 1) * offset_scale;\n        const opmath_t p0_h_ =\n            p0_h - ((dilation_h * (kernel_h - 1)) >> 1) * offset_scale;\n        for (int i = 0; i < kernel_w; ++i) {\n            for (int j = 0; j < kernel_h; ++j) {\n                const opmath_t offset_w = data_offset[data_loc_w_ptr];\n                const opmath_t offset_h = data_offset[data_loc_w_ptr + 1];\n                const opmath_t loc_w =\n                    p0_w_ + (i * dilation_w + offset_w) * offset_scale;\n                const opmath_t loc_h =\n                    p0_h_ + (j * dilation_h + offset_h) * offset_scale;\n                const opmath_t weight = data_mask[data_weight_ptr];\n                if (loc_h > -1 && loc_w > -1 && loc_h < height_in &&\n                    loc_w < width_in) {\n                    col += dcnv3_im2col_bilinear(\n                               data_im_ptr, height_in, width_in, group,\n                               group_channels, loc_h, loc_w, g_col, c_col) *\n                           weight;\n                }\n                data_weight_ptr += 1;\n                data_loc_w_ptr += 2;\n            }\n        }\n        *data_col_ptr = col;\n    }\n}\n\n// debug\ntemplate <typename scalar_t, unsigned int blockSize>\n__global__ void dcnv3_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1(\n    const int num_kernels, const scalar_t *grad_col, const scalar_t *data_im,\n    const scalar_t *data_offset, const scalar_t *data_mask, const int kernel_h,\n    const int kernel_w, const int stride_h, const int stride_w, const int pad_h,\n    const int pad_w, const int dilation_h, const int dilation_w,\n    const int group, const int group_channels, const int height_in,\n    const int width_in, const int height_out, const int width_out,\n    const opmath_t offset_scale, opmath_t *grad_im, opmath_t *grad_offset,\n    opmath_t *grad_mask) {\n    CUDA_KERNEL_LOOP(index, num_kernels) {\n        __shared__ opmath_t cache_grad_offset[blockSize * 2];\n        __shared__ opmath_t cache_grad_mask[blockSize];\n        unsigned int tid = threadIdx.x;\n        int _temp = index;\n        const int c_col = _temp % group_channels;\n        _temp /= group_channels;\n        const int sampling_index = _temp;\n        const int g_col = _temp % group;\n        _temp /= group;\n        const int p0_w = ((dilation_w * (kernel_w - 1)) >> 1) - pad_w +\n                         (_temp % width_out) * stride_w;\n        _temp /= width_out;\n        const int p0_h = ((dilation_h * (kernel_h - 1)) >> 1) - pad_h +\n                         (_temp % height_out) * stride_h;\n        _temp /= height_out;\n        const int b_col = _temp;\n\n        const opmath_t top_grad = grad_col[index];\n        const int input_size = height_in * width_in;\n        const int kernel_size = kernel_h * kernel_w;\n        int data_weight_ptr = sampling_index * kernel_size;\n        int data_loc_w_ptr = data_weight_ptr << 1;\n        const int grad_sampling_ptr = data_weight_ptr;\n        grad_offset += grad_sampling_ptr << 1;\n        grad_mask += grad_sampling_ptr;\n        const int qid_stride = group * group_channels;\n        const int im_ptr_offset = b_col * input_size * qid_stride;\n        const scalar_t *data_im_ptr = data_im + im_ptr_offset;\n        opmath_t *grad_im_ptr = grad_im + im_ptr_offset;\n        const opmath_t p0_w_ =\n            p0_w - ((dilation_w * (kernel_w - 1)) >> 1) * offset_scale;\n        const opmath_t p0_h_ =\n            p0_h - ((dilation_h * (kernel_h - 1)) >> 1) * offset_scale;\n        for (int i = 0; i < kernel_w; ++i) {\n            for (int j = 0; j < kernel_h; ++j) {\n                const opmath_t offset_w = data_offset[data_loc_w_ptr];\n                const opmath_t offset_h = data_offset[data_loc_w_ptr + 1];\n                const opmath_t loc_w =\n                    p0_w_ + (i * dilation_w + offset_w) * offset_scale;\n                const opmath_t loc_h =\n                    p0_h_ + (j * dilation_h + offset_h) * offset_scale;\n                const opmath_t weight = data_mask[data_weight_ptr];\n                *(cache_grad_offset + (threadIdx.x << 1)) = 0;\n                *(cache_grad_offset + ((threadIdx.x << 1) + 1)) = 0;\n                *(cache_grad_mask + threadIdx.x) = 0;\n                if (loc_h > -1 && loc_w > -1 && loc_h < height_in &&\n                    loc_w < width_in) {\n                    dcnv3_col2im_bilinear(\n                        data_im_ptr, height_in, width_in, group, group_channels,\n                        loc_h, loc_w, g_col, c_col, offset_scale, top_grad,\n                        weight, grad_im_ptr,\n                        cache_grad_offset + (threadIdx.x << 1),\n                        cache_grad_mask + threadIdx.x);\n                }\n\n                __syncthreads();\n                if (tid == 0) {\n                    opmath_t _grad_w = cache_grad_offset[0],\n                             _grad_h = cache_grad_offset[1],\n                             _grad_a = cache_grad_mask[0];\n                    int sid = 2;\n                    for (unsigned int tid = 1; tid < blockSize; ++tid) {\n                        _grad_w += cache_grad_offset[sid];\n                        _grad_h += cache_grad_offset[sid + 1];\n                        _grad_a += cache_grad_mask[tid];\n                        sid += 2;\n                    }\n\n                    *grad_offset = _grad_w;\n                    *(grad_offset + 1) = _grad_h;\n                    *grad_mask = _grad_a;\n                }\n                __syncthreads();\n\n                data_weight_ptr += 1;\n                data_loc_w_ptr += 2;\n                grad_mask += 1;\n                grad_offset += 2;\n            }\n        }\n    }\n}\n\ntemplate <typename scalar_t, unsigned int blockSize>\n__global__ void dcnv3_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2(\n    const int num_kernels, const scalar_t *grad_col, const scalar_t *data_im,\n    const scalar_t *data_offset, const scalar_t *data_mask, const int kernel_h,\n    const int kernel_w, const int stride_h, const int stride_w, const int pad_h,\n    const int pad_w, const int dilation_h, const int dilation_w,\n    const int group, const int group_channels, const int height_in,\n    const int width_in, const int height_out, const int width_out,\n    const opmath_t offset_scale, opmath_t *grad_im, opmath_t *grad_offset,\n    opmath_t *grad_mask) {\n    CUDA_KERNEL_LOOP(index, num_kernels) {\n        __shared__ opmath_t cache_grad_offset[blockSize * 2];\n        __shared__ opmath_t cache_grad_mask[blockSize];\n        unsigned int tid = threadIdx.x;\n        int _temp = index;\n        const int c_col = _temp % group_channels;\n        _temp /= group_channels;\n        const int sampling_index = _temp;\n        const int g_col = _temp % group;\n        _temp /= group;\n        const int p0_w = ((dilation_w * (kernel_w - 1)) >> 1) - pad_w +\n                         (_temp % width_out) * stride_w;\n        _temp /= width_out;\n        const int p0_h = ((dilation_h * (kernel_h - 1)) >> 1) - pad_h +\n                         (_temp % height_out) * stride_h;\n        _temp /= height_out;\n        const int b_col = _temp;\n\n        const opmath_t top_grad = grad_col[index];\n        const int input_size = height_in * width_in;\n        const int kernel_size = kernel_h * kernel_w;\n        int data_weight_ptr = sampling_index * kernel_size;\n        int data_loc_w_ptr = data_weight_ptr << 1;\n        const int grad_sampling_ptr = data_weight_ptr;\n        grad_offset += grad_sampling_ptr << 1;\n        grad_mask += grad_sampling_ptr;\n        const int qid_stride = group * group_channels;\n        const int im_ptr_offset = b_col * input_size * qid_stride;\n        const scalar_t *data_im_ptr = data_im + im_ptr_offset;\n        opmath_t *grad_im_ptr = grad_im + im_ptr_offset;\n        const opmath_t p0_w_ =\n            p0_w - ((dilation_w * (kernel_w - 1)) >> 1) * offset_scale;\n        const opmath_t p0_h_ =\n            p0_h - ((dilation_h * (kernel_h - 1)) >> 1) * offset_scale;\n        for (int i = 0; i < kernel_w; ++i) {\n            for (int j = 0; j < kernel_h; ++j) {\n                const opmath_t offset_w = data_offset[data_loc_w_ptr];\n                const opmath_t offset_h = data_offset[data_loc_w_ptr + 1];\n                const opmath_t loc_w =\n                    p0_w_ + (i * dilation_w + offset_w) * offset_scale;\n                const opmath_t loc_h =\n                    p0_h_ + (j * dilation_h + offset_h) * offset_scale;\n                const opmath_t weight = data_mask[data_weight_ptr];\n                *(cache_grad_offset + (threadIdx.x << 1)) = 0;\n                *(cache_grad_offset + ((threadIdx.x << 1) + 1)) = 0;\n                *(cache_grad_mask + threadIdx.x) = 0;\n                if (loc_h > -1 && loc_w > -1 && loc_h < height_in &&\n                    loc_w < width_in) {\n                    dcnv3_col2im_bilinear(\n                        data_im_ptr, height_in, width_in, group, group_channels,\n                        loc_h, loc_w, g_col, c_col, offset_scale, top_grad,\n                        weight, grad_im_ptr,\n                        cache_grad_offset + (threadIdx.x << 1),\n                        cache_grad_mask + threadIdx.x);\n                }\n\n                __syncthreads();\n\n                for (unsigned int s = blockSize / 2; s > 0; s >>= 1) {\n                    if (tid < s) {\n                        const unsigned int xid1 = tid << 1;\n                        const unsigned int xid2 = (tid + s) << 1;\n                        cache_grad_mask[tid] += cache_grad_mask[tid + s];\n                        cache_grad_offset[xid1] += cache_grad_offset[xid2];\n                        cache_grad_offset[xid1 + 1] +=\n                            cache_grad_offset[xid2 + 1];\n                    }\n                    __syncthreads();\n                }\n\n                if (tid == 0) {\n                    *grad_offset = cache_grad_offset[0];\n                    *(grad_offset + 1) = cache_grad_offset[1];\n                    *grad_mask = cache_grad_mask[0];\n                }\n                __syncthreads();\n\n                data_weight_ptr += 1;\n                data_loc_w_ptr += 2;\n                grad_mask += 1;\n                grad_offset += 2;\n            }\n        }\n    }\n}\n\ntemplate <typename scalar_t>\n__global__ void dcnv3_col2im_gpu_kernel_shm_reduce_v1(\n    const int num_kernels, const scalar_t *grad_col, const scalar_t *data_im,\n    const scalar_t *data_offset, const scalar_t *data_mask, const int kernel_h,\n    const int kernel_w, const int stride_h, const int stride_w, const int pad_h,\n    const int pad_w, const int dilation_h, const int dilation_w,\n    const int group, const int group_channels, const int height_in,\n    const int width_in, const int height_out, const int width_out,\n    const opmath_t offset_scale, opmath_t *grad_im, opmath_t *grad_offset,\n    opmath_t *grad_mask) {\n    CUDA_KERNEL_LOOP(index, num_kernels) {\n        extern __shared__ int _s[];\n        opmath_t *cache_grad_offset = (opmath_t *)_s;\n        opmath_t *cache_grad_mask = cache_grad_offset + 2 * blockDim.x;\n        unsigned int tid = threadIdx.x;\n        int _temp = index;\n        const int c_col = _temp % group_channels;\n        _temp /= group_channels;\n        const int sampling_index = _temp;\n        const int g_col = _temp % group;\n        _temp /= group;\n        const int p0_w = ((dilation_w * (kernel_w - 1)) >> 1) - pad_w +\n                         (_temp % width_out) * stride_w;\n        _temp /= width_out;\n        const int p0_h = ((dilation_h * (kernel_h - 1)) >> 1) - pad_h +\n                         (_temp % height_out) * stride_h;\n        _temp /= height_out;\n        const int b_col = _temp;\n\n        const opmath_t top_grad = grad_col[index];\n        const int input_size = height_in * width_in;\n        const int kernel_size = kernel_h * kernel_w;\n        int data_weight_ptr = sampling_index * kernel_size;\n        int data_loc_w_ptr = data_weight_ptr << 1;\n        const int grad_sampling_ptr = data_weight_ptr;\n        grad_offset += grad_sampling_ptr << 1;\n        grad_mask += grad_sampling_ptr;\n        const int qid_stride = group * group_channels;\n        const int im_ptr_offset = b_col * input_size * qid_stride;\n        const scalar_t *data_im_ptr = data_im + im_ptr_offset;\n        opmath_t *grad_im_ptr = grad_im + im_ptr_offset;\n        const opmath_t p0_w_ =\n            p0_w - ((dilation_w * (kernel_w - 1)) >> 1) * offset_scale;\n        const opmath_t p0_h_ =\n            p0_h - ((dilation_h * (kernel_h - 1)) >> 1) * offset_scale;\n        for (int i = 0; i < kernel_w; ++i) {\n            for (int j = 0; j < kernel_h; ++j) {\n                const opmath_t offset_w = data_offset[data_loc_w_ptr];\n                const opmath_t offset_h = data_offset[data_loc_w_ptr + 1];\n                const opmath_t loc_w =\n                    p0_w_ + (i * dilation_w + offset_w) * offset_scale;\n                const opmath_t loc_h =\n                    p0_h_ + (j * dilation_h + offset_h) * offset_scale;\n                const opmath_t weight = data_mask[data_weight_ptr];\n                *(cache_grad_offset + (threadIdx.x << 1)) = 0;\n                *(cache_grad_offset + ((threadIdx.x << 1) + 1)) = 0;\n                *(cache_grad_mask + threadIdx.x) = 0;\n                if (loc_h > -1 && loc_w > -1 && loc_h < height_in &&\n                    loc_w < width_in) {\n                    dcnv3_col2im_bilinear(\n                        data_im_ptr, height_in, width_in, group, group_channels,\n                        loc_h, loc_w, g_col, c_col, offset_scale, top_grad,\n                        weight, grad_im_ptr,\n                        cache_grad_offset + (threadIdx.x << 1),\n                        cache_grad_mask + threadIdx.x);\n                }\n\n                __syncthreads();\n                if (tid == 0) {\n                    opmath_t _grad_w = cache_grad_offset[0],\n                             _grad_h = cache_grad_offset[1],\n                             _grad_a = cache_grad_mask[0];\n                    int sid = 2;\n                    for (unsigned int tid = 1; tid < blockDim.x; ++tid) {\n                        _grad_w += cache_grad_offset[sid];\n                        _grad_h += cache_grad_offset[sid + 1];\n                        _grad_a += cache_grad_mask[tid];\n                        sid += 2;\n                    }\n\n                    *grad_offset = _grad_w;\n                    *(grad_offset + 1) = _grad_h;\n                    *grad_mask = _grad_a;\n                }\n                __syncthreads();\n\n                data_weight_ptr += 1;\n                data_loc_w_ptr += 2;\n                grad_mask += 1;\n                grad_offset += 2;\n            }\n        }\n    }\n}\n\ntemplate <typename scalar_t>\n__global__ void dcnv3_col2im_gpu_kernel_shm_reduce_v2(\n    const int num_kernels, const scalar_t *grad_col, const scalar_t *data_im,\n    const scalar_t *data_offset, const scalar_t *data_mask, const int kernel_h,\n    const int kernel_w, const int stride_h, const int stride_w, const int pad_h,\n    const int pad_w, const int dilation_h, const int dilation_w,\n    const int group, const int group_channels, const int height_in,\n    const int width_in, const int height_out, const int width_out,\n    const opmath_t offset_scale, opmath_t *grad_im, opmath_t *grad_offset,\n    opmath_t *grad_mask) {\n    CUDA_KERNEL_LOOP(index, num_kernels) {\n        extern __shared__ int _s[];\n        opmath_t *cache_grad_offset = (opmath_t *)_s;\n        opmath_t *cache_grad_mask = cache_grad_offset + 2 * blockDim.x;\n        unsigned int tid = threadIdx.x;\n        int _temp = index;\n        const int c_col = _temp % group_channels;\n        _temp /= group_channels;\n        const int sampling_index = _temp;\n        const int g_col = _temp % group;\n        _temp /= group;\n        const int p0_w = ((dilation_w * (kernel_w - 1)) >> 1) - pad_w +\n                         (_temp % width_out) * stride_w;\n        _temp /= width_out;\n        const int p0_h = ((dilation_h * (kernel_h - 1)) >> 1) - pad_h +\n                         (_temp % height_out) * stride_h;\n        _temp /= height_out;\n        const int b_col = _temp;\n\n        const opmath_t top_grad = grad_col[index];\n        const int input_size = height_in * width_in;\n        const int kernel_size = kernel_h * kernel_w;\n        int data_weight_ptr = sampling_index * kernel_size;\n        int data_loc_w_ptr = data_weight_ptr << 1;\n        const int grad_sampling_ptr = data_weight_ptr;\n        grad_offset += grad_sampling_ptr << 1;\n        grad_mask += grad_sampling_ptr;\n        const int qid_stride = group * group_channels;\n        const int im_ptr_offset = b_col * input_size * qid_stride;\n        const scalar_t *data_im_ptr = data_im + im_ptr_offset;\n        opmath_t *grad_im_ptr = grad_im + im_ptr_offset;\n        const opmath_t p0_w_ =\n            p0_w - ((dilation_w * (kernel_w - 1)) >> 1) * offset_scale;\n        const opmath_t p0_h_ =\n            p0_h - ((dilation_h * (kernel_h - 1)) >> 1) * offset_scale;\n        for (int i = 0; i < kernel_w; ++i) {\n            for (int j = 0; j < kernel_h; ++j) {\n                const opmath_t offset_w = data_offset[data_loc_w_ptr];\n                const opmath_t offset_h = data_offset[data_loc_w_ptr + 1];\n                const opmath_t loc_w =\n                    p0_w_ + (i * dilation_w + offset_w) * offset_scale;\n                const opmath_t loc_h =\n                    p0_h_ + (j * dilation_h + offset_h) * offset_scale;\n                const opmath_t weight = data_mask[data_weight_ptr];\n                *(cache_grad_offset + (threadIdx.x << 1)) = 0;\n                *(cache_grad_offset + ((threadIdx.x << 1) + 1)) = 0;\n                *(cache_grad_mask + threadIdx.x) = 0;\n                if (loc_h > -1 && loc_w > -1 && loc_h < height_in &&\n                    loc_w < width_in) {\n                    dcnv3_col2im_bilinear(\n                        data_im_ptr, height_in, width_in, group, group_channels,\n                        loc_h, loc_w, g_col, c_col, offset_scale, top_grad,\n                        weight, grad_im_ptr,\n                        cache_grad_offset + (threadIdx.x << 1),\n                        cache_grad_mask + threadIdx.x);\n                }\n\n                __syncthreads();\n\n                for (unsigned int s = blockDim.x / 2, spre = blockDim.x; s > 0;\n                     s >>= 1, spre >>= 1) {\n                    if (tid < s) {\n                        const unsigned int xid1 = tid << 1;\n                        const unsigned int xid2 = (tid + s) << 1;\n                        cache_grad_mask[tid] += cache_grad_mask[tid + s];\n                        cache_grad_offset[xid1] += cache_grad_offset[xid2];\n                        cache_grad_offset[xid1 + 1] +=\n                            cache_grad_offset[xid2 + 1];\n                        if (tid + (s << 1) < spre) {\n                            cache_grad_mask[tid] +=\n                                cache_grad_mask[tid + (s << 1)];\n                            cache_grad_offset[xid1] +=\n                                cache_grad_offset[xid2 + (s << 1)];\n                            cache_grad_offset[xid1 + 1] +=\n                                cache_grad_offset[xid2 + 1 + (s << 1)];\n                        }\n                    }\n                    __syncthreads();\n                }\n\n                if (tid == 0) {\n                    *grad_offset = cache_grad_offset[0];\n                    *(grad_offset + 1) = cache_grad_offset[1];\n                    *grad_mask = cache_grad_mask[0];\n                }\n                __syncthreads();\n\n                data_weight_ptr += 1;\n                data_loc_w_ptr += 2;\n                grad_mask += 1;\n                grad_offset += 2;\n            }\n        }\n    }\n}\n\ntemplate <typename scalar_t>\n__global__ void dcnv3_col2im_gpu_kernel_shm_reduce_v2_multi_blocks(\n    const int num_kernels, const scalar_t *grad_col, const scalar_t *data_im,\n    const scalar_t *data_offset, const scalar_t *data_mask, const int kernel_h,\n    const int kernel_w, const int stride_h, const int stride_w, const int pad_h,\n    const int pad_w, const int dilation_h, const int dilation_w,\n    const int group, const int group_channels, const int height_in,\n    const int width_in, const int height_out, const int width_out,\n    const opmath_t offset_scale, opmath_t *grad_im, opmath_t *grad_offset,\n    opmath_t *grad_mask) {\n    CUDA_KERNEL_LOOP(index, num_kernels) {\n        extern __shared__ int _s[];\n        opmath_t *cache_grad_offset = (opmath_t *)_s;\n        opmath_t *cache_grad_mask = cache_grad_offset + 2 * blockDim.x;\n        unsigned int tid = threadIdx.x;\n        int _temp = index;\n        const int c_col = _temp % group_channels;\n        _temp /= group_channels;\n        const int sampling_index = _temp;\n        const int g_col = _temp % group;\n        _temp /= group;\n        const int p0_w = ((dilation_w * (kernel_w - 1)) >> 1) - pad_w +\n                         (_temp % width_out) * stride_w;\n        _temp /= width_out;\n        const int p0_h = ((dilation_h * (kernel_h - 1)) >> 1) - pad_h +\n                         (_temp % height_out) * stride_h;\n        _temp /= height_out;\n        const int b_col = _temp;\n\n        const opmath_t top_grad = grad_col[index];\n        const int input_size = height_in * width_in;\n        const int kernel_size = kernel_h * kernel_w;\n        int data_weight_ptr = sampling_index * kernel_size;\n        int data_loc_w_ptr = data_weight_ptr << 1;\n        const int grad_sampling_ptr = data_weight_ptr;\n        grad_offset += grad_sampling_ptr << 1;\n        grad_mask += grad_sampling_ptr;\n        const int qid_stride = group * group_channels;\n        const int im_ptr_offset = b_col * input_size * qid_stride;\n        const scalar_t *data_im_ptr = data_im + im_ptr_offset;\n        opmath_t *grad_im_ptr = grad_im + im_ptr_offset;\n        const opmath_t p0_w_ =\n            p0_w - ((dilation_w * (kernel_w - 1)) >> 1) * offset_scale;\n        const opmath_t p0_h_ =\n            p0_h - ((dilation_h * (kernel_h - 1)) >> 1) * offset_scale;\n        for (int i = 0; i < kernel_w; ++i) {\n            for (int j = 0; j < kernel_h; ++j) {\n                const opmath_t offset_w = data_offset[data_loc_w_ptr];\n                const opmath_t offset_h = data_offset[data_loc_w_ptr + 1];\n                const opmath_t loc_w =\n                    p0_w_ + (i * dilation_w + offset_w) * offset_scale;\n                const opmath_t loc_h =\n                    p0_h_ + (j * dilation_h + offset_h) * offset_scale;\n                const opmath_t weight = data_mask[data_weight_ptr];\n                *(cache_grad_offset + (threadIdx.x << 1)) = 0;\n                *(cache_grad_offset + ((threadIdx.x << 1) + 1)) = 0;\n                *(cache_grad_mask + threadIdx.x) = 0;\n                if (loc_h > -1 && loc_w > -1 && loc_h < height_in &&\n                    loc_w < width_in) {\n                    dcnv3_col2im_bilinear(\n                        data_im_ptr, height_in, width_in, group, group_channels,\n                        loc_h, loc_w, g_col, c_col, offset_scale, top_grad,\n                        weight, grad_im_ptr,\n                        cache_grad_offset + (threadIdx.x << 1),\n                        cache_grad_mask + threadIdx.x);\n                }\n\n                __syncthreads();\n\n                for (unsigned int s = blockDim.x / 2, spre = blockDim.x; s > 0;\n                     s >>= 1, spre >>= 1) {\n                    if (tid < s) {\n                        const unsigned int xid1 = tid << 1;\n                        const unsigned int xid2 = (tid + s) << 1;\n                        cache_grad_mask[tid] += cache_grad_mask[tid + s];\n                        cache_grad_offset[xid1] += cache_grad_offset[xid2];\n                        cache_grad_offset[xid1 + 1] +=\n                            cache_grad_offset[xid2 + 1];\n                        if (tid + (s << 1) < spre) {\n                            cache_grad_mask[tid] +=\n                                cache_grad_mask[tid + (s << 1)];\n                            cache_grad_offset[xid1] +=\n                                cache_grad_offset[xid2 + (s << 1)];\n                            cache_grad_offset[xid1 + 1] +=\n                                cache_grad_offset[xid2 + 1 + (s << 1)];\n                        }\n                    }\n                    __syncthreads();\n                }\n\n                if (tid == 0) {\n                    atomicAdd(grad_offset, cache_grad_offset[0]);\n                    atomicAdd(grad_offset + 1, cache_grad_offset[1]);\n                    atomicAdd(grad_mask, cache_grad_mask[0]);\n                }\n                __syncthreads();\n\n                data_weight_ptr += 1;\n                data_loc_w_ptr += 2;\n                grad_mask += 1;\n                grad_offset += 2;\n            }\n        }\n    }\n}\n\ntemplate <typename scalar_t>\n__global__ void dcnv3_col2im_gpu_kernel_gm(\n    const int num_kernels, const scalar_t *grad_col, const scalar_t *data_im,\n    const scalar_t *data_offset, const scalar_t *data_mask, const int kernel_h,\n    const int kernel_w, const int stride_h, const int stride_w, const int pad_h,\n    const int pad_w, const int dilation_h, const int dilation_w,\n    const int group, const int group_channels, const int height_in,\n    const int width_in, const int height_out, const int width_out,\n    const opmath_t offset_scale, opmath_t *grad_im, opmath_t *grad_offset,\n    opmath_t *grad_mask) {\n    CUDA_KERNEL_LOOP(index, num_kernels) {\n        int _temp = index;\n        const int c_col = _temp % group_channels;\n        _temp /= group_channels;\n        const int sampling_index = _temp;\n        const int g_col = _temp % group;\n        _temp /= group;\n        const int p0_w = ((dilation_w * (kernel_w - 1)) >> 1) - pad_w +\n                         (_temp % width_out) * stride_w;\n        _temp /= width_out;\n        const int p0_h = ((dilation_h * (kernel_h - 1)) >> 1) - pad_h +\n                         (_temp % height_out) * stride_h;\n        _temp /= height_out;\n        const int b_col = _temp;\n\n        const opmath_t top_grad = grad_col[index];\n        const int input_size = height_in * width_in;\n        const int kernel_size = kernel_h * kernel_w;\n        int data_weight_ptr = sampling_index * kernel_size;\n        int data_loc_w_ptr = data_weight_ptr << 1;\n        const int grad_sampling_ptr = data_weight_ptr;\n        grad_offset += grad_sampling_ptr << 1;\n        grad_mask += grad_sampling_ptr;\n        const int qid_stride = group * group_channels;\n        const int im_ptr_offset = b_col * input_size * qid_stride;\n        const scalar_t *data_im_ptr = data_im + im_ptr_offset;\n        opmath_t *grad_im_ptr = grad_im + im_ptr_offset;\n        const opmath_t p0_w_ =\n            p0_w - ((dilation_w * (kernel_w - 1)) >> 1) * offset_scale;\n        const opmath_t p0_h_ =\n            p0_h - ((dilation_h * (kernel_h - 1)) >> 1) * offset_scale;\n        for (int i = 0; i < kernel_w; ++i) {\n            for (int j = 0; j < kernel_h; ++j) {\n                const opmath_t offset_w = data_offset[data_loc_w_ptr];\n                const opmath_t offset_h = data_offset[data_loc_w_ptr + 1];\n                const opmath_t loc_w =\n                    p0_w_ + (i * dilation_w + offset_w) * offset_scale;\n                const opmath_t loc_h =\n                    p0_h_ + (j * dilation_h + offset_h) * offset_scale;\n                const opmath_t weight = data_mask[data_weight_ptr];\n                if (loc_h > -1 && loc_w > -1 && loc_h < height_in &&\n                    loc_w < width_in) {\n                    dcnv3_col2im_bilinear_gm(\n                        data_im_ptr, height_in, width_in, group, group_channels,\n                        loc_h, loc_w, g_col, c_col, offset_scale, top_grad,\n                        weight, grad_im_ptr, grad_offset, grad_mask);\n                }\n                data_weight_ptr += 1;\n                data_loc_w_ptr += 2;\n                grad_mask += 1;\n                grad_offset += 2;\n            }\n        }\n    }\n}\n\ntemplate <typename scalar_t>\nvoid dcnv3_im2col_cuda(cudaStream_t stream, const scalar_t *data_im,\n                       const scalar_t *data_offset, const scalar_t *data_mask,\n                       scalar_t *data_col, const int kernel_h,\n                       const int kernel_w, const int stride_h,\n                       const int stride_w, const int pad_h, const int pad_w,\n                       const int dilation_h, const int dilation_w,\n                       const int group, const int group_channels,\n                       const int batch_n, const int height_in,\n                       const int width_in, const int height_out,\n                       const int width_out, const opmath_t offset_scale) {\n    const int num_kernels =\n        batch_n * height_out * width_out * group * group_channels;\n    const int num_actual_kernels =\n        batch_n * height_out * width_out * group * group_channels;\n    const int num_threads = CUDA_NUM_THREADS;\n    dcnv3_im2col_gpu_kernel<scalar_t>\n        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,\n           stream>>>(num_kernels, data_im, data_offset, data_mask, data_col,\n                     kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w,\n                     dilation_h, dilation_w, group, group_channels, height_in,\n                     width_in, height_out, width_out, offset_scale);\n\n    cudaError_t err = cudaGetLastError();\n    if (err != cudaSuccess) {\n        printf(\"error in dcnv3_im2col_cuda: %s\\n\", cudaGetErrorString(err));\n    }\n}\n\ntemplate <typename scalar_t>\nvoid dcnv3_col2im_cuda(\n    cudaStream_t stream, const scalar_t *grad_col, const scalar_t *data_im,\n    const scalar_t *data_offset, const scalar_t *data_mask, const int kernel_h,\n    const int kernel_w, const int stride_h, const int stride_w, const int pad_h,\n    const int pad_w, const int dilation_h, const int dilation_w,\n    const int group, const int group_channels, const int batch_n,\n    const int height_in, const int width_in, const int height_out,\n    const int width_out, const opmath_t offset_scale, opmath_t *grad_im,\n    opmath_t *grad_offset, opmath_t *grad_mask) {\n    const int num_threads =\n        (group_channels > CUDA_NUM_THREADS) ? CUDA_NUM_THREADS : group_channels;\n    const int num_kernels =\n        batch_n * height_out * width_out * group * group_channels;\n    const int num_actual_kernels =\n        batch_n * height_out * width_out * group * group_channels;\n    if (group_channels > 1024) {\n        if ((group_channels & 1023) == 0) {\n            dcnv3_col2im_gpu_kernel_shm_reduce_v2_multi_blocks<scalar_t>\n                <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,\n                   num_threads * 3 * sizeof(opmath_t), stream>>>(\n                    num_kernels, grad_col, data_im, data_offset, data_mask,\n                    kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w,\n                    dilation_h, dilation_w, group, group_channels, height_in,\n                    width_in, height_out, width_out, offset_scale, grad_im,\n                    grad_offset, grad_mask);\n        } else {\n            dcnv3_col2im_gpu_kernel_gm<scalar_t>\n                <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,\n                   stream>>>(num_kernels, grad_col, data_im, data_offset,\n                             data_mask, kernel_h, kernel_w, stride_h, stride_w,\n                             pad_h, pad_w, dilation_h, dilation_w, group,\n                             group_channels, height_in, width_in, height_out,\n                             width_out, offset_scale, grad_im, grad_offset,\n                             grad_mask);\n        }\n    } else {\n        switch (group_channels) {\n        case 1:\n            dcnv3_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 1>\n                <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,\n                   stream>>>(num_kernels, grad_col, data_im, data_offset,\n                             data_mask, kernel_h, kernel_w, stride_h, stride_w,\n                             pad_h, pad_w, dilation_h, dilation_w, group,\n                             group_channels, height_in, width_in, height_out,\n                             width_out, offset_scale, grad_im, grad_offset,\n                             grad_mask);\n            break;\n        case 2:\n            dcnv3_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 2>\n                <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,\n                   stream>>>(num_kernels, grad_col, data_im, data_offset,\n                             data_mask, kernel_h, kernel_w, stride_h, stride_w,\n                             pad_h, pad_w, dilation_h, dilation_w, group,\n                             group_channels, height_in, width_in, height_out,\n                             width_out, offset_scale, grad_im, grad_offset,\n                             grad_mask);\n            break;\n        case 4:\n            dcnv3_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 4>\n                <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,\n                   stream>>>(num_kernels, grad_col, data_im, data_offset,\n                             data_mask, kernel_h, kernel_w, stride_h, stride_w,\n                             pad_h, pad_w, dilation_h, dilation_w, group,\n                             group_channels, height_in, width_in, height_out,\n                             width_out, offset_scale, grad_im, grad_offset,\n                             grad_mask);\n            break;\n        case 8:\n            dcnv3_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 8>\n                <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,\n                   stream>>>(num_kernels, grad_col, data_im, data_offset,\n                             data_mask, kernel_h, kernel_w, stride_h, stride_w,\n                             pad_h, pad_w, dilation_h, dilation_w, group,\n                             group_channels, height_in, width_in, height_out,\n                             width_out, offset_scale, grad_im, grad_offset,\n                             grad_mask);\n            break;\n        case 16:\n            dcnv3_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 16>\n                <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,\n                   stream>>>(num_kernels, grad_col, data_im, data_offset,\n                             data_mask, kernel_h, kernel_w, stride_h, stride_w,\n                             pad_h, pad_w, dilation_h, dilation_w, group,\n                             group_channels, height_in, width_in, height_out,\n                             width_out, offset_scale, grad_im, grad_offset,\n                             grad_mask);\n            break;\n        case 32:\n            dcnv3_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 32>\n                <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,\n                   stream>>>(num_kernels, grad_col, data_im, data_offset,\n                             data_mask, kernel_h, kernel_w, stride_h, stride_w,\n                             pad_h, pad_w, dilation_h, dilation_w, group,\n                             group_channels, height_in, width_in, height_out,\n                             width_out, offset_scale, grad_im, grad_offset,\n                             grad_mask);\n            break;\n        case 64:\n            dcnv3_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 64>\n                <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,\n                   stream>>>(num_kernels, grad_col, data_im, data_offset,\n                             data_mask, kernel_h, kernel_w, stride_h, stride_w,\n                             pad_h, pad_w, dilation_h, dilation_w, group,\n                             group_channels, height_in, width_in, height_out,\n                             width_out, offset_scale, grad_im, grad_offset,\n                             grad_mask);\n            break;\n        case 128:\n            dcnv3_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 128>\n                <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,\n                   stream>>>(num_kernels, grad_col, data_im, data_offset,\n                             data_mask, kernel_h, kernel_w, stride_h, stride_w,\n                             pad_h, pad_w, dilation_h, dilation_w, group,\n                             group_channels, height_in, width_in, height_out,\n                             width_out, offset_scale, grad_im, grad_offset,\n                             grad_mask);\n            break;\n        case 256:\n            dcnv3_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 256>\n                <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,\n                   stream>>>(num_kernels, grad_col, data_im, data_offset,\n                             data_mask, kernel_h, kernel_w, stride_h, stride_w,\n                             pad_h, pad_w, dilation_h, dilation_w, group,\n                             group_channels, height_in, width_in, height_out,\n                             width_out, offset_scale, grad_im, grad_offset,\n                             grad_mask);\n            break;\n        case 512:\n            dcnv3_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 512>\n                <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,\n                   stream>>>(num_kernels, grad_col, data_im, data_offset,\n                             data_mask, kernel_h, kernel_w, stride_h, stride_w,\n                             pad_h, pad_w, dilation_h, dilation_w, group,\n                             group_channels, height_in, width_in, height_out,\n                             width_out, offset_scale, grad_im, grad_offset,\n                             grad_mask);\n            break;\n        case 1024:\n            dcnv3_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t,\n                                                                  1024>\n                <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads, 0,\n                   stream>>>(num_kernels, grad_col, data_im, data_offset,\n                             data_mask, kernel_h, kernel_w, stride_h, stride_w,\n                             pad_h, pad_w, dilation_h, dilation_w, group,\n                             group_channels, height_in, width_in, height_out,\n                             width_out, offset_scale, grad_im, grad_offset,\n                             grad_mask);\n            break;\n        default:\n            if (group_channels < 64) {\n                dcnv3_col2im_gpu_kernel_shm_reduce_v1<scalar_t>\n                    <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,\n                       num_threads * 3 * sizeof(opmath_t), stream>>>(\n                        num_kernels, grad_col, data_im, data_offset, data_mask,\n                        kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w,\n                        dilation_h, dilation_w, group, group_channels,\n                        height_in, width_in, height_out, width_out,\n                        offset_scale, grad_im, grad_offset, grad_mask);\n            } else {\n                dcnv3_col2im_gpu_kernel_shm_reduce_v2<scalar_t>\n                    <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,\n                       num_threads * 3 * sizeof(opmath_t), stream>>>(\n                        num_kernels, grad_col, data_im, data_offset, data_mask,\n                        kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w,\n                        dilation_h, dilation_w, group, group_channels,\n                        height_in, width_in, height_out, width_out,\n                        offset_scale, grad_im, grad_offset, grad_mask);\n            }\n        }\n    }\n    cudaError_t err = cudaGetLastError();\n    if (err != cudaSuccess) {\n        printf(\"error in dcnv3_col2im_cuda: %s\\n\", cudaGetErrorString(err));\n    }\n}"
  },
  {
    "path": "mmdet3d/ops/ops_dcnv3/src/dcnv3.h",
    "content": "/*!\n**************************************************************************************************\n* InternImage\n* Copyright (c) 2022 OpenGVLab\n* Licensed under The MIT License [see LICENSE for details]\n**************************************************************************************************\n* Modified from\n*https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0\n**************************************************************************************************\n*/\n\n#pragma once\n\n#include \"cpu/dcnv3_cpu.h\"\n\n#ifdef WITH_CUDA\n#include \"cuda/dcnv3_cuda.h\"\n#endif\n\nat::Tensor dcnv3_forward(const at::Tensor &input, const at::Tensor &offset,\n                         const at::Tensor &mask, const int kernel_h,\n                         const int kernel_w, const int stride_h,\n                         const int stride_w, const int pad_h, const int pad_w,\n                         const int dilation_h, const int dilation_w,\n                         const int group, const int group_channels,\n                         const float offset_scale, const int im2col_step) {\n    if (input.type().is_cuda()) {\n#ifdef WITH_CUDA\n        return dcnv3_cuda_forward(input, offset, mask, kernel_h, kernel_w,\n                                  stride_h, stride_w, pad_h, pad_w, dilation_h,\n                                  dilation_w, group, group_channels,\n                                  offset_scale, im2col_step);\n#else\n        AT_ERROR(\"Not compiled with GPU support\");\n#endif\n    }\n    AT_ERROR(\"Not implemented on the CPU\");\n}\n\nstd::vector<at::Tensor>\ndcnv3_backward(const at::Tensor &input, const at::Tensor &offset,\n               const at::Tensor &mask, const int kernel_h, const int kernel_w,\n               const int stride_h, const int stride_w, const int pad_h,\n               const int pad_w, const int dilation_h, const int dilation_w,\n               const int group, const int group_channels,\n               const float offset_scale, const at::Tensor &grad_output,\n               const int im2col_step) {\n    if (input.type().is_cuda()) {\n#ifdef WITH_CUDA\n        return dcnv3_cuda_backward(input, offset, mask, kernel_h, kernel_w,\n                                   stride_h, stride_w, pad_h, pad_w, dilation_h,\n                                   dilation_w, group, group_channels,\n                                   offset_scale, grad_output, im2col_step);\n#else\n        AT_ERROR(\"Not compiled with GPU support\");\n#endif\n    }\n    AT_ERROR(\"Not implemented on the CPU\");\n}\n"
  },
  {
    "path": "mmdet3d/ops/ops_dcnv3/src/vision.cpp",
    "content": "/*!\n**************************************************************************************************\n* InternImage\n* Copyright (c) 2022 OpenGVLab\n* Licensed under The MIT License [see LICENSE for details]\n**************************************************************************************************\n* Modified from\n*https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0\n**************************************************************************************************\n*/\n\n#include \"dcnv3.h\"\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n    m.def(\"dcnv3_forward\", &dcnv3_forward, \"dcnv3_forward\");\n    m.def(\"dcnv3_backward\", &dcnv3_backward, \"dcnv3_backward\");\n}\n"
  },
  {
    "path": "mmdet3d/ops/ops_dcnv3/test.py",
    "content": "# --------------------------------------------------------\n# InternImage\n# Copyright (c) 2022 OpenGVLab\n# Licensed under The MIT License [see LICENSE for details]\n# --------------------------------------------------------\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\nimport time\nimport torch\nimport torch.nn as nn\nimport math\nfrom torch.autograd import gradcheck\n\nfrom functions.dcnv3_func import DCNv3Function, dcnv3_core_pytorch\n\nH_in, W_in = 8, 8\nN, M, D = 2, 4, 16\nKh, Kw = 3, 3\nP = Kh * Kw\noffset_scale = 2.0\npad = 1\ndilation = 1\nstride = 1\nH_out = (H_in + 2 * pad - (dilation * (Kh - 1) + 1)) // stride + 1\nW_out = (W_in + 2 * pad - (dilation * (Kw - 1) + 1)) // stride + 1\n\ntorch.manual_seed(3)\n\n\n@torch.no_grad()\ndef check_forward_equal_with_pytorch_double():\n    input = torch.rand(N, H_in, W_in, M*D).cuda() * 0.01\n    offset = torch.rand(N, H_out, W_out, M*P*2).cuda() * 10\n    mask = torch.rand(N, H_out, W_out, M, P).cuda() + 1e-5\n    mask /= mask.sum(-1, keepdim=True)\n    mask = mask.reshape(N, H_out, W_out, M*P)\n\n    output_pytorch = dcnv3_core_pytorch(\n        input.double(),\n        offset.double(),\n        mask.double(),\n        Kh, Kw, stride, stride, Kh // 2, Kw // 2, dilation, dilation, M, D, offset_scale).detach().cpu()\n\n    im2col_step = 2\n    output_cuda = DCNv3Function.apply(\n        input.double(),\n        offset.double(),\n        mask.double(),\n        Kh, Kw, stride, stride, Kh // 2, Kw // 2, dilation, dilation, M, D, offset_scale,\n        im2col_step).detach().cpu()\n\n    fwdok = torch.allclose(output_cuda, output_pytorch)\n    max_abs_err = (output_cuda - output_pytorch).abs().max()\n    max_rel_err = ((output_cuda - output_pytorch).abs() /\n                   output_pytorch.abs()).max()\n    print('>>> forward double')\n    print(f'* {fwdok} check_forward_equal_with_pytorch_double: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}')\n\n\n@torch.no_grad()\ndef check_forward_equal_with_pytorch_float():\n    input = torch.rand(N, H_in, W_in, M*D).cuda() * 0.01\n    offset = torch.rand(N, H_out, W_out, M*P*2).cuda() * 10\n    mask = torch.rand(N, H_out, W_out, M, P).cuda() + 1e-5\n    mask /= mask.sum(-1, keepdim=True)\n    mask = mask.reshape(N, H_out, W_out, M*P)\n\n    output_pytorch = dcnv3_core_pytorch(\n        input,\n        offset,\n        mask,\n        Kh, Kw, stride, stride, Kh // 2, Kw // 2, dilation, dilation, M, D, offset_scale).detach().cpu()\n\n    im2col_step = 2\n    output_cuda = DCNv3Function.apply(\n        input,\n        offset,\n        mask,\n        Kh, Kw, stride, stride, Kh // 2, Kw // 2, dilation, dilation, M, D, offset_scale,\n        im2col_step).detach().cpu()\n\n    fwdok = torch.allclose(output_cuda, output_pytorch, rtol=1e-2, atol=1e-3)\n    max_abs_err = (output_cuda - output_pytorch).abs().max()\n    max_rel_err = ((output_cuda - output_pytorch).abs() /\n                   output_pytorch.abs()).max()\n    print('>>> forward float')\n    print(f'* {fwdok} check_forward_equal_with_pytorch_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}')\n\n\ndef check_backward_equal_with_pytorch_double(channels=4, grad_input=True, grad_offset=True, grad_mask=True):\n    # H_in, W_in = 4, 4\n    N = 2\n    M = 2\n    H_out = (H_in + 2 * pad - (dilation * (Kh - 1) + 1)) // stride + 1\n    W_out = (W_in + 2 * pad - (dilation * (Kw - 1) + 1)) // stride + 1\n\n    D = channels\n    input0 = torch.rand(N, H_in, W_in, M*D).cuda() * 0.01\n    offset0 = torch.rand(N, H_out, W_out, M*P*2).cuda() * 10\n    mask0 = torch.rand(N, H_out, W_out, M, P).cuda() + 1e-5\n    mask0 /= mask0.sum(-1, keepdim=True)\n    mask0 = mask0.reshape(N, H_out, W_out, M*P)\n    input0.requires_grad = grad_input\n    offset0.requires_grad = grad_offset\n    mask0.requires_grad = grad_mask\n\n    output_pytorch = dcnv3_core_pytorch(\n        input0.double(),\n        offset0.double(),\n        mask0.double(),\n        Kh, Kw, stride, stride, Kh // 2, Kw // 2, dilation, dilation, M, D, offset_scale)\n    output_pytorch.sum().backward()\n\n    input1 = input0.detach()\n    offset1 = offset0.detach()\n    mask1 = mask0.detach()\n    input1.requires_grad = grad_input\n    offset1.requires_grad = grad_offset\n    mask1.requires_grad = grad_mask\n\n    im2col_step = 2\n    output_cuda = DCNv3Function.apply(\n        input1.double(),\n        offset1.double(),\n        mask1.double(),\n        Kh, Kw, stride, stride, Kh // 2, Kw // 2, dilation, dilation, M, D, offset_scale,\n        im2col_step)\n    output_cuda.sum().backward()\n\n    print(f'>>> backward double: channels {D}')\n    bwdok = torch.allclose(input0.grad, input1.grad, rtol=1e-2, atol=1e-3)\n    max_abs_err = (input0.grad - input1.grad).abs().max()\n    max_rel_err = ((input0.grad - input1.grad).abs() /\n                   input0.grad.abs()).max()\n    print(\n        f'* {bwdok} input_grad check_backward_equal_with_pytorch_double: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}')\n\n    bwdok = torch.allclose(offset0.grad, offset1.grad, rtol=1e-2, atol=1e-3)\n    max_abs_err = (offset0.grad - offset1.grad).abs().max()\n    max_rel_err = ((offset0.grad - offset1.grad).abs() /\n                   offset0.grad.abs()).max()\n    print(\n        f'* {bwdok} offset_grad check_backward_equal_with_pytorch_double: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}')\n\n    bwdok = torch.allclose(mask0.grad, mask1.grad, rtol=1e-2, atol=1e-3)\n    max_abs_err = (mask0.grad - mask1.grad).abs().max()\n    max_rel_err = ((mask0.grad - mask1.grad).abs() /\n                   mask0.grad.abs()).max()\n    print(\n        f'* {bwdok} mask_grad check_backward_equal_with_pytorch_double: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}')\n\n\ndef check_backward_equal_with_pytorch_float(channels=4, grad_input=True, grad_offset=True, grad_mask=True):\n    # H_in, W_in = 4, 4\n    N = 2\n    M = 2\n    H_out = (H_in + 2 * pad - (dilation * (Kh - 1) + 1)) // stride + 1\n    W_out = (W_in + 2 * pad - (dilation * (Kw - 1) + 1)) // stride + 1\n\n    D = channels\n    input0 = torch.rand(N, H_in, W_in, M*D).cuda() * 0.01\n    offset0 = torch.rand(N, H_out, W_out, M*P*2).cuda() * 10\n    mask0 = torch.rand(N, H_out, W_out, M, P).cuda() + 1e-5\n    mask0 /= mask0.sum(-1, keepdim=True)\n    mask0 = mask0.reshape(N, H_out, W_out, M*P)\n    input0.requires_grad = grad_input\n    offset0.requires_grad = grad_offset\n    mask0.requires_grad = grad_mask\n\n    output_pytorch = dcnv3_core_pytorch(\n        input0,\n        offset0,\n        mask0,\n        Kh, Kw, stride, stride, Kh // 2, Kw // 2, dilation, dilation, M, D, offset_scale)\n    output_pytorch.sum().backward()\n\n    input1 = input0.detach()\n    offset1 = offset0.detach()\n    mask1 = mask0.detach()\n    input1.requires_grad = grad_input\n    offset1.requires_grad = grad_offset\n    mask1.requires_grad = grad_mask\n\n    im2col_step = 2\n    output_cuda = DCNv3Function.apply(\n        input1,\n        offset1,\n        mask1,\n        Kh, Kw, stride, stride, Kh // 2, Kw // 2, dilation, dilation, M, D, offset_scale,\n        im2col_step)\n    output_cuda.sum().backward()\n\n    print(f'>>> backward float: channels {D}')\n    bwdok = torch.allclose(input0.grad, input1.grad, rtol=1e-2, atol=1e-3)\n    max_abs_err = (input0.grad - input1.grad).abs().max()\n    max_rel_err = ((input0.grad - input1.grad).abs() /\n                   input0.grad.abs()).max()\n    print(\n        f'* {bwdok} input_grad check_backward_equal_with_pytorch_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}')\n\n    bwdok = torch.allclose(offset0.grad, offset1.grad, rtol=1e-2, atol=1e-3)\n    max_abs_err = (offset0.grad - offset1.grad).abs().max()\n    max_rel_err = ((offset0.grad - offset1.grad).abs() /\n                   offset0.grad.abs()).max()\n    print(\n        f'* {bwdok} offset_grad check_backward_equal_with_pytorch_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}')\n\n    bwdok = torch.allclose(mask0.grad, mask1.grad, rtol=1e-2, atol=1e-3)\n    max_abs_err = (mask0.grad - mask1.grad).abs().max()\n    max_rel_err = ((mask0.grad - mask1.grad).abs() /\n                   mask0.grad.abs()).max()\n    print(\n        f'* {bwdok} mask_grad check_backward_equal_with_pytorch_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}')\n\n\n@torch.no_grad()\ndef check_time_cost(im2col_step=128):\n    N = 512\n    H_in, W_in = 64, 64\n    H_out = (H_in + 2 * pad - (dilation * (Kh - 1) + 1)) // stride + 1\n    W_out = (W_in + 2 * pad - (dilation * (Kw - 1) + 1)) // stride + 1\n\n    input = torch.rand(N, H_in, W_in, M*D).cuda() * 0.01\n    offset = torch.rand(N, H_out, W_out, M*P*2).cuda() * 10\n    mask = torch.rand(N, H_out, W_out, M, P).cuda() + 1e-5\n    mask /= mask.sum(-1, keepdim=True)\n    mask = mask.reshape(N, H_out, W_out, M*P)\n    print(\n        f'>>> time cost: im2col_step {im2col_step}; input {input.shape}; points {P} ')\n    repeat = 100\n    for i in range(repeat):\n        output_cuda = DCNv3Function.apply(\n            input,\n            offset,\n            mask,\n            Kh, Kw, stride, stride, Kh // 2, Kw // 2, dilation, dilation, M, D, 1.0,\n            im2col_step)\n    torch.cuda.synchronize()\n    start = time.time()\n    for i in range(repeat):\n        output_cuda = DCNv3Function.apply(\n            input,\n            offset,\n            mask,\n            Kh, Kw, stride, stride, Kh // 2, Kw // 2, dilation, dilation, M, D, 1.0,\n            im2col_step)\n    torch.cuda.synchronize()\n    print(f'foward time cost: {(time.time() - start) / repeat}')\n\n\nif __name__ == '__main__':\n    check_forward_equal_with_pytorch_double()\n    check_forward_equal_with_pytorch_float()\n    for channels in [1, 16, 30, 32, 64, 71, 1025]:\n        check_backward_equal_with_pytorch_double(channels, True, True, True)\n    for channels in [1, 16, 30, 32, 64, 71, 1025]:\n        check_backward_equal_with_pytorch_float(channels, True, True, True)\n    for i in range(3):\n        im2col_step = 128 * (2 ** i)\n        check_time_cost(im2col_step)\n"
  },
  {
    "path": "mmdet3d/ops/paconv/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .paconv import PAConv, PAConvCUDA\n\n__all__ = ['PAConv', 'PAConvCUDA']\n"
  },
  {
    "path": "mmdet3d/ops/paconv/paconv.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\n\nimport torch\nfrom mmcv.cnn import (ConvModule, build_activation_layer, build_norm_layer,\n                      constant_init)\nfrom mmcv.ops import assign_score_withk as assign_score_cuda\nfrom torch import nn as nn\nfrom torch.nn import functional as F\n\nfrom .utils import assign_kernel_withoutk, assign_score, calc_euclidian_dist\n\n\nclass ScoreNet(nn.Module):\n    r\"\"\"ScoreNet that outputs coefficient scores to assemble kernel weights in\n    the weight bank according to the relative position of point pairs.\n\n    Args:\n        mlp_channels (List[int]): Hidden unit sizes of SharedMLP layers.\n        last_bn (bool, optional): Whether to use BN on the last output of mlps.\n            Defaults to False.\n        score_norm (str, optional): Normalization function of output scores.\n            Can be 'softmax', 'sigmoid' or 'identity'. Defaults to 'softmax'.\n        temp_factor (float, optional): Temperature factor to scale the output\n            scores before softmax. Defaults to 1.0.\n        norm_cfg (dict, optional): Type of normalization method.\n            Defaults to dict(type='BN2d').\n        bias (bool | str, optional): If specified as `auto`, it will be decided\n            by the norm_cfg. Bias will be set as True if `norm_cfg` is None,\n            otherwise False. Defaults to 'auto'.\n\n    Note:\n        The official code applies xavier_init to all Conv layers in ScoreNet,\n            see `PAConv <https://github.com/CVMI-Lab/PAConv/blob/main/scene_seg\n            /model/pointnet2/paconv.py#L105>`_. However in our experiments, we\n            did not find much difference in applying such xavier initialization\n            or not. So we neglect this initialization in our implementation.\n    \"\"\"\n\n    def __init__(self,\n                 mlp_channels,\n                 last_bn=False,\n                 score_norm='softmax',\n                 temp_factor=1.0,\n                 norm_cfg=dict(type='BN2d'),\n                 bias='auto'):\n        super(ScoreNet, self).__init__()\n\n        assert score_norm in ['softmax', 'sigmoid', 'identity'], \\\n            f'unsupported score_norm function {score_norm}'\n\n        self.score_norm = score_norm\n        self.temp_factor = temp_factor\n\n        self.mlps = nn.Sequential()\n        for i in range(len(mlp_channels) - 2):\n            self.mlps.add_module(\n                f'layer{i}',\n                ConvModule(\n                    mlp_channels[i],\n                    mlp_channels[i + 1],\n                    kernel_size=(1, 1),\n                    stride=(1, 1),\n                    conv_cfg=dict(type='Conv2d'),\n                    norm_cfg=norm_cfg,\n                    bias=bias))\n\n        # for the last mlp that outputs scores, no relu and possibly no bn\n        i = len(mlp_channels) - 2\n        self.mlps.add_module(\n            f'layer{i}',\n            ConvModule(\n                mlp_channels[i],\n                mlp_channels[i + 1],\n                kernel_size=(1, 1),\n                stride=(1, 1),\n                conv_cfg=dict(type='Conv2d'),\n                norm_cfg=norm_cfg if last_bn else None,\n                act_cfg=None,\n                bias=bias))\n\n    def forward(self, xyz_features):\n        \"\"\"Forward.\n\n        Args:\n            xyz_features (torch.Tensor): (B, C, N, K), features constructed\n                from xyz coordinates of point pairs. May contain relative\n                positions, Euclidean distance, etc.\n\n        Returns:\n            torch.Tensor: (B, N, K, M), predicted scores for `M` kernels.\n        \"\"\"\n        scores = self.mlps(xyz_features)  # (B, M, N, K)\n\n        # perform score normalization\n        if self.score_norm == 'softmax':\n            scores = F.softmax(scores / self.temp_factor, dim=1)\n        elif self.score_norm == 'sigmoid':\n            scores = torch.sigmoid(scores / self.temp_factor)\n        else:  # 'identity'\n            scores = scores\n\n        scores = scores.permute(0, 2, 3, 1)  # (B, N, K, M)\n\n        return scores\n\n\nclass PAConv(nn.Module):\n    \"\"\"Non-CUDA version of PAConv.\n\n    PAConv stores a trainable weight bank containing several kernel weights.\n    Given input points and features, it computes coefficient scores to assemble\n    those kernels to form conv kernels, and then runs convolution on the input.\n\n    Args:\n        in_channels (int): Input channels of point features.\n        out_channels (int): Output channels of point features.\n        num_kernels (int): Number of kernel weights in the weight bank.\n        norm_cfg (dict, optional): Type of normalization method.\n            Defaults to dict(type='BN2d', momentum=0.1).\n        act_cfg (dict, optional): Type of activation method.\n            Defaults to dict(type='ReLU', inplace=True).\n        scorenet_input (str, optional): Type of input to ScoreNet.\n            Can be 'identity', 'w_neighbor' or 'w_neighbor_dist'.\n            Defaults to 'w_neighbor_dist'.\n        weight_bank_init (str, optional): Init method of weight bank kernels.\n            Can be 'kaiming' or 'xavier'. Defaults to 'kaiming'.\n        kernel_input (str, optional): Input features to be multiplied with\n            kernel weights. Can be 'identity' or 'w_neighbor'.\n            Defaults to 'w_neighbor'.\n        scorenet_cfg (dict, optional): Config of the ScoreNet module, which\n            may contain the following keys and values:\n\n            - mlp_channels (List[int]): Hidden units of MLPs.\n            - score_norm (str): Normalization function of output scores.\n                Can be 'softmax', 'sigmoid' or 'identity'.\n            - temp_factor (float): Temperature factor to scale the output\n                scores before softmax.\n            - last_bn (bool): Whether to use BN on the last output of mlps.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 num_kernels,\n                 norm_cfg=dict(type='BN2d', momentum=0.1),\n                 act_cfg=dict(type='ReLU', inplace=True),\n                 scorenet_input='w_neighbor_dist',\n                 weight_bank_init='kaiming',\n                 kernel_input='w_neighbor',\n                 scorenet_cfg=dict(\n                     mlp_channels=[16, 16, 16],\n                     score_norm='softmax',\n                     temp_factor=1.0,\n                     last_bn=False)):\n        super(PAConv, self).__init__()\n\n        # determine weight kernel size according to used features\n        if kernel_input == 'identity':\n            # only use grouped_features\n            kernel_mul = 1\n        elif kernel_input == 'w_neighbor':\n            # concat of (grouped_features - center_features, grouped_features)\n            kernel_mul = 2\n        else:\n            raise NotImplementedError(\n                f'unsupported kernel_input {kernel_input}')\n        self.kernel_input = kernel_input\n        in_channels = kernel_mul * in_channels\n\n        # determine mlp channels in ScoreNet according to used xyz features\n        if scorenet_input == 'identity':\n            # only use relative position (grouped_xyz - center_xyz)\n            self.scorenet_in_channels = 3\n        elif scorenet_input == 'w_neighbor':\n            # (grouped_xyz - center_xyz, grouped_xyz)\n            self.scorenet_in_channels = 6\n        elif scorenet_input == 'w_neighbor_dist':\n            # (center_xyz, grouped_xyz - center_xyz, Euclidean distance)\n            self.scorenet_in_channels = 7\n        else:\n            raise NotImplementedError(\n                f'unsupported scorenet_input {scorenet_input}')\n        self.scorenet_input = scorenet_input\n\n        # construct kernel weights in weight bank\n        # self.weight_bank is of shape [C, num_kernels * out_c]\n        # where C can be in_c or (2 * in_c)\n        if weight_bank_init == 'kaiming':\n            weight_init = nn.init.kaiming_normal_\n        elif weight_bank_init == 'xavier':\n            weight_init = nn.init.xavier_normal_\n        else:\n            raise NotImplementedError(\n                f'unsupported weight bank init method {weight_bank_init}')\n\n        self.num_kernels = num_kernels  # the parameter `m` in the paper\n        weight_bank = weight_init(\n            torch.empty(self.num_kernels, in_channels, out_channels))\n        weight_bank = weight_bank.permute(1, 0, 2).reshape(\n            in_channels, self.num_kernels * out_channels).contiguous()\n        self.weight_bank = nn.Parameter(weight_bank, requires_grad=True)\n\n        # construct ScoreNet\n        scorenet_cfg_ = copy.deepcopy(scorenet_cfg)\n        scorenet_cfg_['mlp_channels'].insert(0, self.scorenet_in_channels)\n        scorenet_cfg_['mlp_channels'].append(self.num_kernels)\n        self.scorenet = ScoreNet(**scorenet_cfg_)\n\n        self.bn = build_norm_layer(norm_cfg, out_channels)[1] if \\\n            norm_cfg is not None else None\n        self.activate = build_activation_layer(act_cfg) if \\\n            act_cfg is not None else None\n\n        # set some basic attributes of Conv layers\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n\n        self.init_weights()\n\n    def init_weights(self):\n        \"\"\"Initialize weights of shared MLP layers and BN layers.\"\"\"\n        if self.bn is not None:\n            constant_init(self.bn, val=1, bias=0)\n\n    def _prepare_scorenet_input(self, points_xyz):\n        \"\"\"Prepare input point pairs features for self.ScoreNet.\n\n        Args:\n            points_xyz (torch.Tensor): (B, 3, npoint, K)\n                Coordinates of the grouped points.\n\n        Returns:\n            torch.Tensor: (B, C, npoint, K)\n                The generated features per point pair.\n        \"\"\"\n        B, _, npoint, K = points_xyz.size()\n        center_xyz = points_xyz[..., :1].repeat(1, 1, 1, K)\n        xyz_diff = points_xyz - center_xyz  # [B, 3, npoint, K]\n        if self.scorenet_input == 'identity':\n            xyz_features = xyz_diff\n        elif self.scorenet_input == 'w_neighbor':\n            xyz_features = torch.cat((xyz_diff, points_xyz), dim=1)\n        else:  # w_neighbor_dist\n            euclidian_dist = calc_euclidian_dist(\n                center_xyz.permute(0, 2, 3, 1).reshape(B * npoint * K, 3),\n                points_xyz.permute(0, 2, 3, 1).reshape(B * npoint * K, 3)).\\\n                    reshape(B, 1, npoint, K)\n            xyz_features = torch.cat((center_xyz, xyz_diff, euclidian_dist),\n                                     dim=1)\n        return xyz_features\n\n    def forward(self, inputs):\n        \"\"\"Forward.\n\n        Args:\n            inputs (tuple(torch.Tensor)):\n\n                - features (torch.Tensor): (B, in_c, npoint, K)\n                    Features of the queried points.\n                - points_xyz (torch.Tensor): (B, 3, npoint, K)\n                    Coordinates of the grouped points.\n\n        Returns:\n            Tuple[torch.Tensor]:\n\n                - new_features: (B, out_c, npoint, K), features after PAConv.\n                - points_xyz: same as input.\n        \"\"\"\n        features, points_xyz = inputs\n        B, _, npoint, K = features.size()\n\n        if self.kernel_input == 'w_neighbor':\n            center_features = features[..., :1].repeat(1, 1, 1, K)\n            features_diff = features - center_features\n            # to (B, 2 * in_c, npoint, K)\n            features = torch.cat((features_diff, features), dim=1)\n\n        # prepare features for between each point and its grouping center\n        xyz_features = self._prepare_scorenet_input(points_xyz)\n\n        # scores to assemble kernel weights\n        scores = self.scorenet(xyz_features)  # [B, npoint, K, m]\n\n        # first compute out features over all kernels\n        # features is [B, C, npoint, K], weight_bank is [C, m * out_c]\n        new_features = torch.matmul(\n            features.permute(0, 2, 3, 1),\n            self.weight_bank).view(B, npoint, K, self.num_kernels,\n                                   -1)  # [B, npoint, K, m, out_c]\n\n        # then aggregate using scores\n        new_features = assign_score(scores, new_features)\n        # to [B, out_c, npoint, K]\n        new_features = new_features.permute(0, 3, 1, 2).contiguous()\n\n        if self.bn is not None:\n            new_features = self.bn(new_features)\n        if self.activate is not None:\n            new_features = self.activate(new_features)\n\n        # in order to keep input output consistency\n        # so that we can wrap PAConv in Sequential\n        return (new_features, points_xyz)\n\n\nclass PAConvCUDA(PAConv):\n    \"\"\"CUDA version of PAConv that implements a cuda op to efficiently perform\n    kernel assembling.\n\n    Different from vanilla PAConv, the input features of this function is not\n    grouped by centers. Instead, they will be queried on-the-fly by the\n    additional input `points_idx`. This avoids the large intermediate matrix.\n    See the `paper <https://arxiv.org/pdf/2103.14635.pdf>`_ appendix Sec. D for\n    more detailed descriptions.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 num_kernels,\n                 norm_cfg=dict(type='BN2d', momentum=0.1),\n                 act_cfg=dict(type='ReLU', inplace=True),\n                 scorenet_input='w_neighbor_dist',\n                 weight_bank_init='kaiming',\n                 kernel_input='w_neighbor',\n                 scorenet_cfg=dict(\n                     mlp_channels=[8, 16, 16],\n                     score_norm='softmax',\n                     temp_factor=1.0,\n                     last_bn=False)):\n        super(PAConvCUDA, self).__init__(\n            in_channels=in_channels,\n            out_channels=out_channels,\n            num_kernels=num_kernels,\n            norm_cfg=norm_cfg,\n            act_cfg=act_cfg,\n            scorenet_input=scorenet_input,\n            weight_bank_init=weight_bank_init,\n            kernel_input=kernel_input,\n            scorenet_cfg=scorenet_cfg)\n\n        assert self.kernel_input == 'w_neighbor', \\\n            'CUDA implemented PAConv only supports w_neighbor kernel_input'\n\n    def forward(self, inputs):\n        \"\"\"Forward.\n\n        Args:\n            inputs (tuple(torch.Tensor)):\n\n                - features (torch.Tensor): (B, in_c, N)\n                    Features of all points in the current point cloud.\n                    Different from non-CUDA version PAConv, here the features\n                        are not grouped by each center to form a K dim.\n                - points_xyz (torch.Tensor): (B, 3, npoint, K)\n                    Coordinates of the grouped points.\n                - points_idx (torch.Tensor): (B, npoint, K)\n                    Index of the grouped points.\n\n        Returns:\n            Tuple[torch.Tensor]:\n\n                - new_features: (B, out_c, npoint, K), features after PAConv.\n                - points_xyz: same as input.\n                - points_idx: same as input.\n        \"\"\"\n        features, points_xyz, points_idx = inputs\n\n        # prepare features for between each point and its grouping center\n        xyz_features = self._prepare_scorenet_input(points_xyz)\n\n        # scores to assemble kernel weights\n        scores = self.scorenet(xyz_features)  # [B, npoint, K, m]\n\n        # pre-compute features for points and centers separately\n        # features is [B, in_c, N], weight_bank is [C, m * out_dim]\n        point_feat, center_feat = assign_kernel_withoutk(\n            features, self.weight_bank, self.num_kernels)\n\n        # aggregate features using custom cuda op\n        new_features = assign_score_cuda(\n            scores, point_feat, center_feat, points_idx,\n            'sum').contiguous()  # [B, out_c, npoint, K]\n\n        if self.bn is not None:\n            new_features = self.bn(new_features)\n        if self.activate is not None:\n            new_features = self.activate(new_features)\n\n        # in order to keep input output consistency\n        return (new_features, points_xyz, points_idx)\n"
  },
  {
    "path": "mmdet3d/ops/paconv/utils.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\n\ndef calc_euclidian_dist(xyz1, xyz2):\n    \"\"\"Calculate the Euclidean distance between two sets of points.\n\n    Args:\n        xyz1 (torch.Tensor): (N, 3), the first set of points.\n        xyz2 (torch.Tensor): (N, 3), the second set of points.\n\n    Returns:\n        torch.Tensor: (N, ), the Euclidean distance between each point pair.\n    \"\"\"\n    assert xyz1.shape[0] == xyz2.shape[0], 'number of points are not the same'\n    assert xyz1.shape[1] == xyz2.shape[1] == 3, \\\n        'points coordinates dimension is not 3'\n    return torch.norm(xyz1 - xyz2, dim=-1)\n\n\ndef assign_score(scores, point_features):\n    \"\"\"Perform weighted sum to aggregate output features according to scores.\n    This function is used in non-CUDA version of PAConv.\n\n    Compared to the cuda op assigh_score_withk, this pytorch implementation\n        pre-computes output features for the neighbors of all centers, and then\n        performs aggregation. It consumes more GPU memories.\n\n    Args:\n        scores (torch.Tensor): (B, npoint, K, M), predicted scores to\n            aggregate weight matrices in the weight bank.\n            `npoint` is the number of sampled centers.\n            `K` is the number of queried neighbors.\n            `M` is the number of weight matrices in the weight bank.\n        point_features (torch.Tensor): (B, npoint, K, M, out_dim)\n            Pre-computed point features to be aggregated.\n\n    Returns:\n        torch.Tensor: (B, npoint, K, out_dim), the aggregated features.\n    \"\"\"\n    B, npoint, K, M = scores.size()\n    scores = scores.view(B, npoint, K, 1, M)\n    output = torch.matmul(scores, point_features).view(B, npoint, K, -1)\n    return output\n\n\ndef assign_kernel_withoutk(features, kernels, M):\n    \"\"\"Pre-compute features with weight matrices in weight bank. This function\n    is used before cuda op assign_score_withk in CUDA version PAConv.\n\n    Args:\n        features (torch.Tensor): (B, in_dim, N), input features of all points.\n            `N` is the number of points in current point cloud.\n        kernels (torch.Tensor): (2 * in_dim, M * out_dim), weight matrices in\n            the weight bank, transformed from (M, 2 * in_dim, out_dim).\n            `2 * in_dim` is because the input features are concatenation of\n            (point_features - center_features, point_features).\n        M (int): Number of weight matrices in the weight bank.\n\n    Returns:\n        Tuple[torch.Tensor]: both of shape (B, N, M, out_dim):\n\n            - point_features: Pre-computed features for points.\n            - center_features: Pre-computed features for centers.\n    \"\"\"\n    B, in_dim, N = features.size()\n    feat_trans = features.permute(0, 2, 1)  # [B, N, in_dim]\n    out_feat_half1 = torch.matmul(feat_trans, kernels[:in_dim]).view(\n        B, N, M, -1)  # [B, N, M, out_dim]\n    out_feat_half2 = torch.matmul(feat_trans, kernels[in_dim:]).view(\n        B, N, M, -1)  # [B, N, M, out_dim]\n\n    # TODO: why this hard-coded if condition?\n    # when the network input is only xyz without additional features\n    # xyz will be used as features, so that features.size(1) == 3 % 2 != 0\n    # we need to compensate center_features because otherwise\n    # `point_features - center_features` will result in all zeros?\n    if features.size(1) % 2 != 0:\n        out_feat_half_coord = torch.matmul(\n            feat_trans[:, :, :3],  # [B, N, 3]\n            kernels[in_dim:in_dim + 3]).view(B, N, M, -1)  # [B, N, M, out_dim]\n    else:\n        out_feat_half_coord = torch.zeros_like(out_feat_half2)\n\n    point_features = out_feat_half1 + out_feat_half2\n    center_features = out_feat_half1 + out_feat_half_coord\n    return point_features, center_features\n"
  },
  {
    "path": "mmdet3d/ops/pointnet_modules/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .builder import build_sa_module\nfrom .paconv_sa_module import (PAConvCUDASAModule, PAConvCUDASAModuleMSG,\n                               PAConvSAModule, PAConvSAModuleMSG)\nfrom .point_fp_module import PointFPModule\nfrom .point_sa_module import PointSAModule, PointSAModuleMSG\n\n__all__ = [\n    'build_sa_module', 'PointSAModuleMSG', 'PointSAModule', 'PointFPModule',\n    'PAConvSAModule', 'PAConvSAModuleMSG', 'PAConvCUDASAModule',\n    'PAConvCUDASAModuleMSG'\n]\n"
  },
  {
    "path": "mmdet3d/ops/pointnet_modules/builder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.utils import Registry\n\nSA_MODULES = Registry('point_sa_module')\n\n\ndef build_sa_module(cfg, *args, **kwargs):\n    \"\"\"Build PointNet2 set abstraction (SA) module.\n\n    Args:\n        cfg (None or dict): The SA module config, which should contain:\n            - type (str): Module type.\n            - module args: Args needed to instantiate an SA module.\n        args (argument list): Arguments passed to the `__init__`\n            method of the corresponding module.\n        kwargs (keyword arguments): Keyword arguments passed to the `__init__`\n            method of the corresponding SA module .\n\n    Returns:\n        nn.Module: Created SA module.\n    \"\"\"\n    if cfg is None:\n        cfg_ = dict(type='PointSAModule')\n    else:\n        if not isinstance(cfg, dict):\n            raise TypeError('cfg must be a dict')\n        if 'type' not in cfg:\n            raise KeyError('the cfg dict must contain the key \"type\"')\n        cfg_ = cfg.copy()\n\n    module_type = cfg_.pop('type')\n    if module_type not in SA_MODULES:\n        raise KeyError(f'Unrecognized module type {module_type}')\n    else:\n        sa_module = SA_MODULES.get(module_type)\n\n    module = sa_module(*args, **kwargs, **cfg_)\n\n    return module\n"
  },
  {
    "path": "mmdet3d/ops/pointnet_modules/paconv_sa_module.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom torch import nn as nn\n\nfrom mmdet3d.ops import PAConv, PAConvCUDA\nfrom .builder import SA_MODULES\nfrom .point_sa_module import BasePointSAModule\n\n\n@SA_MODULES.register_module()\nclass PAConvSAModuleMSG(BasePointSAModule):\n    r\"\"\"Point set abstraction module with multi-scale grouping (MSG) used in\n    PAConv networks.\n\n    Replace the MLPs in `PointSAModuleMSG` with PAConv layers.\n    See the `paper <https://arxiv.org/abs/2103.14635>`_ for more details.\n\n    Args:\n        paconv_num_kernels (list[list[int]]): Number of kernel weights in the\n            weight banks of each layer's PAConv.\n        paconv_kernel_input (str, optional): Input features to be multiplied\n            with kernel weights. Can be 'identity' or 'w_neighbor'.\n            Defaults to 'w_neighbor'.\n        scorenet_input (str, optional): Type of the input to ScoreNet.\n            Defaults to 'w_neighbor_dist'. Can be the following values:\n\n            - 'identity': Use xyz coordinates as input.\n            - 'w_neighbor': Use xyz coordinates and the difference with center\n                points as input.\n            - 'w_neighbor_dist': Use xyz coordinates, the difference with\n                center points and the Euclidean distance as input.\n\n        scorenet_cfg (dict, optional): Config of the ScoreNet module, which\n            may contain the following keys and values:\n\n            - mlp_channels (List[int]): Hidden units of MLPs.\n            - score_norm (str): Normalization function of output scores.\n                Can be 'softmax', 'sigmoid' or 'identity'.\n            - temp_factor (float): Temperature factor to scale the output\n                scores before softmax.\n            - last_bn (bool): Whether to use BN on the last output of mlps.\n    \"\"\"\n\n    def __init__(self,\n                 num_point,\n                 radii,\n                 sample_nums,\n                 mlp_channels,\n                 paconv_num_kernels,\n                 fps_mod=['D-FPS'],\n                 fps_sample_range_list=[-1],\n                 dilated_group=False,\n                 norm_cfg=dict(type='BN2d', momentum=0.1),\n                 use_xyz=True,\n                 pool_mod='max',\n                 normalize_xyz=False,\n                 bias='auto',\n                 paconv_kernel_input='w_neighbor',\n                 scorenet_input='w_neighbor_dist',\n                 scorenet_cfg=dict(\n                     mlp_channels=[16, 16, 16],\n                     score_norm='softmax',\n                     temp_factor=1.0,\n                     last_bn=False)):\n        super(PAConvSAModuleMSG, self).__init__(\n            num_point=num_point,\n            radii=radii,\n            sample_nums=sample_nums,\n            mlp_channels=mlp_channels,\n            fps_mod=fps_mod,\n            fps_sample_range_list=fps_sample_range_list,\n            dilated_group=dilated_group,\n            use_xyz=use_xyz,\n            pool_mod=pool_mod,\n            normalize_xyz=normalize_xyz,\n            grouper_return_grouped_xyz=True)\n\n        assert len(paconv_num_kernels) == len(mlp_channels)\n        for i in range(len(mlp_channels)):\n            assert len(paconv_num_kernels[i]) == len(mlp_channels[i]) - 1, \\\n                'PAConv number of kernel weights wrong'\n\n        # in PAConv, bias only exists in ScoreNet\n        scorenet_cfg['bias'] = bias\n\n        for i in range(len(self.mlp_channels)):\n            mlp_channel = self.mlp_channels[i]\n            if use_xyz:\n                mlp_channel[0] += 3\n\n            num_kernels = paconv_num_kernels[i]\n\n            mlp = nn.Sequential()\n            for i in range(len(mlp_channel) - 1):\n                mlp.add_module(\n                    f'layer{i}',\n                    PAConv(\n                        mlp_channel[i],\n                        mlp_channel[i + 1],\n                        num_kernels[i],\n                        norm_cfg=norm_cfg,\n                        kernel_input=paconv_kernel_input,\n                        scorenet_input=scorenet_input,\n                        scorenet_cfg=scorenet_cfg))\n            self.mlps.append(mlp)\n\n\n@SA_MODULES.register_module()\nclass PAConvSAModule(PAConvSAModuleMSG):\n    r\"\"\"Point set abstraction module with single-scale grouping (SSG) used in\n    PAConv networks.\n\n    Replace the MLPs in `PointSAModule` with PAConv layers. See the `paper\n    <https://arxiv.org/abs/2103.14635>`_ for more details.\n    \"\"\"\n\n    def __init__(self,\n                 mlp_channels,\n                 paconv_num_kernels,\n                 num_point=None,\n                 radius=None,\n                 num_sample=None,\n                 norm_cfg=dict(type='BN2d', momentum=0.1),\n                 use_xyz=True,\n                 pool_mod='max',\n                 fps_mod=['D-FPS'],\n                 fps_sample_range_list=[-1],\n                 normalize_xyz=False,\n                 paconv_kernel_input='w_neighbor',\n                 scorenet_input='w_neighbor_dist',\n                 scorenet_cfg=dict(\n                     mlp_channels=[16, 16, 16],\n                     score_norm='softmax',\n                     temp_factor=1.0,\n                     last_bn=False)):\n        super(PAConvSAModule, self).__init__(\n            mlp_channels=[mlp_channels],\n            paconv_num_kernels=[paconv_num_kernels],\n            num_point=num_point,\n            radii=[radius],\n            sample_nums=[num_sample],\n            norm_cfg=norm_cfg,\n            use_xyz=use_xyz,\n            pool_mod=pool_mod,\n            fps_mod=fps_mod,\n            fps_sample_range_list=fps_sample_range_list,\n            normalize_xyz=normalize_xyz,\n            paconv_kernel_input=paconv_kernel_input,\n            scorenet_input=scorenet_input,\n            scorenet_cfg=scorenet_cfg)\n\n\n@SA_MODULES.register_module()\nclass PAConvCUDASAModuleMSG(BasePointSAModule):\n    r\"\"\"Point set abstraction module with multi-scale grouping (MSG) used in\n    PAConv networks.\n\n    Replace the non CUDA version PAConv with CUDA implemented PAConv for\n    efficient computation. See the `paper <https://arxiv.org/abs/2103.14635>`_\n    for more details.\n    \"\"\"\n\n    def __init__(self,\n                 num_point,\n                 radii,\n                 sample_nums,\n                 mlp_channels,\n                 paconv_num_kernels,\n                 fps_mod=['D-FPS'],\n                 fps_sample_range_list=[-1],\n                 dilated_group=False,\n                 norm_cfg=dict(type='BN2d', momentum=0.1),\n                 use_xyz=True,\n                 pool_mod='max',\n                 normalize_xyz=False,\n                 bias='auto',\n                 paconv_kernel_input='w_neighbor',\n                 scorenet_input='w_neighbor_dist',\n                 scorenet_cfg=dict(\n                     mlp_channels=[8, 16, 16],\n                     score_norm='softmax',\n                     temp_factor=1.0,\n                     last_bn=False)):\n        super(PAConvCUDASAModuleMSG, self).__init__(\n            num_point=num_point,\n            radii=radii,\n            sample_nums=sample_nums,\n            mlp_channels=mlp_channels,\n            fps_mod=fps_mod,\n            fps_sample_range_list=fps_sample_range_list,\n            dilated_group=dilated_group,\n            use_xyz=use_xyz,\n            pool_mod=pool_mod,\n            normalize_xyz=normalize_xyz,\n            grouper_return_grouped_xyz=True,\n            grouper_return_grouped_idx=True)\n\n        assert len(paconv_num_kernels) == len(mlp_channels)\n        for i in range(len(mlp_channels)):\n            assert len(paconv_num_kernels[i]) == len(mlp_channels[i]) - 1, \\\n                'PAConv number of kernel weights wrong'\n\n        # in PAConv, bias only exists in ScoreNet\n        scorenet_cfg['bias'] = bias\n\n        # we need to manually concat xyz for CUDA implemented PAConv\n        self.use_xyz = use_xyz\n\n        for i in range(len(self.mlp_channels)):\n            mlp_channel = self.mlp_channels[i]\n            if use_xyz:\n                mlp_channel[0] += 3\n\n            num_kernels = paconv_num_kernels[i]\n\n            # can't use `nn.Sequential` for PAConvCUDA because its input and\n            # output have different shapes\n            mlp = nn.ModuleList()\n            for i in range(len(mlp_channel) - 1):\n                mlp.append(\n                    PAConvCUDA(\n                        mlp_channel[i],\n                        mlp_channel[i + 1],\n                        num_kernels[i],\n                        norm_cfg=norm_cfg,\n                        kernel_input=paconv_kernel_input,\n                        scorenet_input=scorenet_input,\n                        scorenet_cfg=scorenet_cfg))\n            self.mlps.append(mlp)\n\n    def forward(\n        self,\n        points_xyz,\n        features=None,\n        indices=None,\n        target_xyz=None,\n    ):\n        \"\"\"forward.\n\n        Args:\n            points_xyz (Tensor): (B, N, 3) xyz coordinates of the features.\n            features (Tensor, optional): (B, C, N) features of each point.\n                Default: None.\n            indices (Tensor, optional): (B, num_point) Index of the features.\n                Default: None.\n            target_xyz (Tensor, optional): (B, M, 3) new coords of the outputs.\n                Default: None.\n\n        Returns:\n            Tensor: (B, M, 3) where M is the number of points.\n                New features xyz.\n            Tensor: (B, M, sum_k(mlps[k][-1])) where M is the number\n                of points. New feature descriptors.\n            Tensor: (B, M) where M is the number of points.\n                Index of the features.\n        \"\"\"\n        new_features_list = []\n\n        # sample points, (B, num_point, 3), (B, num_point)\n        new_xyz, indices = self._sample_points(points_xyz, features, indices,\n                                               target_xyz)\n\n        for i in range(len(self.groupers)):\n            xyz = points_xyz\n            new_features = features\n            for j in range(len(self.mlps[i])):\n                # we don't use grouped_features here to avoid large GPU memory\n                # _, (B, 3, num_point, nsample), (B, num_point, nsample)\n                _, grouped_xyz, grouped_idx = self.groupers[i](xyz, new_xyz,\n                                                               new_features)\n\n                # concat xyz as additional features\n                if self.use_xyz and j == 0:\n                    # (B, C+3, N)\n                    new_features = torch.cat(\n                        (points_xyz.permute(0, 2, 1), new_features), dim=1)\n\n                # (B, out_c, num_point, nsample)\n                grouped_new_features = self.mlps[i][j](\n                    (new_features, grouped_xyz, grouped_idx.long()))[0]\n\n                # different from PointNet++ and non CUDA version of PAConv\n                # CUDA version of PAConv needs to aggregate local features\n                # every time after it passes through a Conv layer\n                # in order to transform to valid input shape\n                # (B, out_c, num_point)\n                new_features = self._pool_features(grouped_new_features)\n\n                # constrain the points to be grouped for next PAConv layer\n                # because new_features only contains sampled centers now\n                # (B, num_point, 3)\n                xyz = new_xyz\n\n            new_features_list.append(new_features)\n\n        return new_xyz, torch.cat(new_features_list, dim=1), indices\n\n\n@SA_MODULES.register_module()\nclass PAConvCUDASAModule(PAConvCUDASAModuleMSG):\n    r\"\"\"Point set abstraction module with single-scale grouping (SSG) used in\n    PAConv networks.\n\n    Replace the non CUDA version PAConv with CUDA implemented PAConv for\n    efficient computation. See the `paper <https://arxiv.org/abs/2103.14635>`_\n    for more details.\n    \"\"\"\n\n    def __init__(self,\n                 mlp_channels,\n                 paconv_num_kernels,\n                 num_point=None,\n                 radius=None,\n                 num_sample=None,\n                 norm_cfg=dict(type='BN2d', momentum=0.1),\n                 use_xyz=True,\n                 pool_mod='max',\n                 fps_mod=['D-FPS'],\n                 fps_sample_range_list=[-1],\n                 normalize_xyz=False,\n                 paconv_kernel_input='w_neighbor',\n                 scorenet_input='w_neighbor_dist',\n                 scorenet_cfg=dict(\n                     mlp_channels=[8, 16, 16],\n                     score_norm='softmax',\n                     temp_factor=1.0,\n                     last_bn=False)):\n        super(PAConvCUDASAModule, self).__init__(\n            mlp_channels=[mlp_channels],\n            paconv_num_kernels=[paconv_num_kernels],\n            num_point=num_point,\n            radii=[radius],\n            sample_nums=[num_sample],\n            norm_cfg=norm_cfg,\n            use_xyz=use_xyz,\n            pool_mod=pool_mod,\n            fps_mod=fps_mod,\n            fps_sample_range_list=fps_sample_range_list,\n            normalize_xyz=normalize_xyz,\n            paconv_kernel_input=paconv_kernel_input,\n            scorenet_input=scorenet_input,\n            scorenet_cfg=scorenet_cfg)\n"
  },
  {
    "path": "mmdet3d/ops/pointnet_modules/point_fp_module.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom typing import List\n\nimport torch\nfrom mmcv.cnn import ConvModule\nfrom mmcv.ops import three_interpolate, three_nn\nfrom mmcv.runner import BaseModule, force_fp32\nfrom torch import nn as nn\n\n\nclass PointFPModule(BaseModule):\n    \"\"\"Point feature propagation module used in PointNets.\n\n    Propagate the features from one set to another.\n\n    Args:\n        mlp_channels (list[int]): List of mlp channels.\n        norm_cfg (dict, optional): Type of normalization method.\n            Default: dict(type='BN2d').\n    \"\"\"\n\n    def __init__(self,\n                 mlp_channels: List[int],\n                 norm_cfg: dict = dict(type='BN2d'),\n                 init_cfg=None):\n        super().__init__(init_cfg=init_cfg)\n        self.fp16_enabled = False\n        self.mlps = nn.Sequential()\n        for i in range(len(mlp_channels) - 1):\n            self.mlps.add_module(\n                f'layer{i}',\n                ConvModule(\n                    mlp_channels[i],\n                    mlp_channels[i + 1],\n                    kernel_size=(1, 1),\n                    stride=(1, 1),\n                    conv_cfg=dict(type='Conv2d'),\n                    norm_cfg=norm_cfg))\n\n    @force_fp32()\n    def forward(self, target: torch.Tensor, source: torch.Tensor,\n                target_feats: torch.Tensor,\n                source_feats: torch.Tensor) -> torch.Tensor:\n        \"\"\"forward.\n\n        Args:\n            target (Tensor): (B, n, 3) tensor of the xyz positions of\n                the target features.\n            source (Tensor): (B, m, 3) tensor of the xyz positions of\n                the source features.\n            target_feats (Tensor): (B, C1, n) tensor of the features to be\n                propagated to.\n            source_feats (Tensor): (B, C2, m) tensor of features\n                to be propagated.\n\n        Return:\n            Tensor: (B, M, N) M = mlp[-1], tensor of the target features.\n        \"\"\"\n        if source is not None:\n            dist, idx = three_nn(target, source)\n            dist_reciprocal = 1.0 / (dist + 1e-8)\n            norm = torch.sum(dist_reciprocal, dim=2, keepdim=True)\n            weight = dist_reciprocal / norm\n\n            interpolated_feats = three_interpolate(source_feats, idx, weight)\n        else:\n            interpolated_feats = source_feats.expand(*source_feats.size()[0:2],\n                                                     target.size(1))\n\n        if target_feats is not None:\n            new_features = torch.cat([interpolated_feats, target_feats],\n                                     dim=1)  # (B, C2 + C1, n)\n        else:\n            new_features = interpolated_feats\n\n        new_features = new_features.unsqueeze(-1)\n        new_features = self.mlps(new_features)\n\n        return new_features.squeeze(-1)\n"
  },
  {
    "path": "mmdet3d/ops/pointnet_modules/point_sa_module.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.cnn import ConvModule\nfrom mmcv.ops import GroupAll\nfrom mmcv.ops import PointsSampler as Points_Sampler\nfrom mmcv.ops import QueryAndGroup, gather_points\nfrom torch import nn as nn\nfrom torch.nn import functional as F\n\nfrom mmdet3d.ops import PAConv\nfrom .builder import SA_MODULES\n\n\nclass BasePointSAModule(nn.Module):\n    \"\"\"Base module for point set abstraction module used in PointNets.\n\n    Args:\n        num_point (int): Number of points.\n        radii (list[float]): List of radius in each ball query.\n        sample_nums (list[int]): Number of samples in each ball query.\n        mlp_channels (list[list[int]]): Specify of the pointnet before\n            the global pooling for each scale.\n        fps_mod (list[str], optional): Type of FPS method, valid mod\n            ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS'].\n            F-FPS: using feature distances for FPS.\n            D-FPS: using Euclidean distances of points for FPS.\n            FS: using F-FPS and D-FPS simultaneously.\n        fps_sample_range_list (list[int], optional):\n            Range of points to apply FPS. Default: [-1].\n        dilated_group (bool, optional): Whether to use dilated ball query.\n            Default: False.\n        use_xyz (bool, optional): Whether to use xyz.\n            Default: True.\n        pool_mod (str, optional): Type of pooling method.\n            Default: 'max_pool'.\n        normalize_xyz (bool, optional): Whether to normalize local XYZ\n            with radius. Default: False.\n        grouper_return_grouped_xyz (bool, optional): Whether to return\n            grouped xyz in `QueryAndGroup`. Defaults to False.\n        grouper_return_grouped_idx (bool, optional): Whether to return\n            grouped idx in `QueryAndGroup`. Defaults to False.\n    \"\"\"\n\n    def __init__(self,\n                 num_point,\n                 radii,\n                 sample_nums,\n                 mlp_channels,\n                 fps_mod=['D-FPS'],\n                 fps_sample_range_list=[-1],\n                 dilated_group=False,\n                 use_xyz=True,\n                 pool_mod='max',\n                 normalize_xyz=False,\n                 grouper_return_grouped_xyz=False,\n                 grouper_return_grouped_idx=False):\n        super(BasePointSAModule, self).__init__()\n\n        assert len(radii) == len(sample_nums) == len(mlp_channels)\n        assert pool_mod in ['max', 'avg']\n        assert isinstance(fps_mod, list) or isinstance(fps_mod, tuple)\n        assert isinstance(fps_sample_range_list, list) or isinstance(\n            fps_sample_range_list, tuple)\n        assert len(fps_mod) == len(fps_sample_range_list)\n\n        if isinstance(mlp_channels, tuple):\n            mlp_channels = list(map(list, mlp_channels))\n        self.mlp_channels = mlp_channels\n\n        if isinstance(num_point, int):\n            self.num_point = [num_point]\n        elif isinstance(num_point, list) or isinstance(num_point, tuple):\n            self.num_point = num_point\n        elif num_point is None:\n            self.num_point = None\n        else:\n            raise NotImplementedError('Error type of num_point!')\n\n        self.pool_mod = pool_mod\n        self.groupers = nn.ModuleList()\n        self.mlps = nn.ModuleList()\n        self.fps_mod_list = fps_mod\n        self.fps_sample_range_list = fps_sample_range_list\n\n        if self.num_point is not None:\n            self.points_sampler = Points_Sampler(self.num_point,\n                                                 self.fps_mod_list,\n                                                 self.fps_sample_range_list)\n        else:\n            self.points_sampler = None\n\n        for i in range(len(radii)):\n            radius = radii[i]\n            sample_num = sample_nums[i]\n            if num_point is not None:\n                if dilated_group and i != 0:\n                    min_radius = radii[i - 1]\n                else:\n                    min_radius = 0\n                grouper = QueryAndGroup(\n                    radius,\n                    sample_num,\n                    min_radius=min_radius,\n                    use_xyz=use_xyz,\n                    normalize_xyz=normalize_xyz,\n                    return_grouped_xyz=grouper_return_grouped_xyz,\n                    return_grouped_idx=grouper_return_grouped_idx)\n            else:\n                grouper = GroupAll(use_xyz)\n            self.groupers.append(grouper)\n\n    def _sample_points(self, points_xyz, features, indices, target_xyz):\n        \"\"\"Perform point sampling based on inputs.\n\n        If `indices` is specified, directly sample corresponding points.\n        Else if `target_xyz` is specified, use is as sampled points.\n        Otherwise sample points using `self.points_sampler`.\n\n        Args:\n            points_xyz (Tensor): (B, N, 3) xyz coordinates of the features.\n            features (Tensor): (B, C, N) features of each point.\n            indices (Tensor): (B, num_point) Index of the features.\n            target_xyz (Tensor): (B, M, 3) new_xyz coordinates of the outputs.\n\n        Returns:\n            Tensor: (B, num_point, 3) sampled xyz coordinates of points.\n            Tensor: (B, num_point) sampled points' index.\n        \"\"\"\n        xyz_flipped = points_xyz.transpose(1, 2).contiguous()\n        if indices is not None:\n            assert (indices.shape[1] == self.num_point[0])\n            new_xyz = gather_points(xyz_flipped, indices).transpose(\n                1, 2).contiguous() if self.num_point is not None else None\n        elif target_xyz is not None:\n            new_xyz = target_xyz.contiguous()\n        else:\n            if self.num_point is not None:\n                indices = self.points_sampler(points_xyz, features)\n                new_xyz = gather_points(xyz_flipped,\n                                        indices).transpose(1, 2).contiguous()\n            else:\n                new_xyz = None\n\n        return new_xyz, indices\n\n    def _pool_features(self, features):\n        \"\"\"Perform feature aggregation using pooling operation.\n\n        Args:\n            features (torch.Tensor): (B, C, N, K)\n                Features of locally grouped points before pooling.\n\n        Returns:\n            torch.Tensor: (B, C, N)\n                Pooled features aggregating local information.\n        \"\"\"\n        if self.pool_mod == 'max':\n            # (B, C, N, 1)\n            new_features = F.max_pool2d(\n                features, kernel_size=[1, features.size(3)])\n        elif self.pool_mod == 'avg':\n            # (B, C, N, 1)\n            new_features = F.avg_pool2d(\n                features, kernel_size=[1, features.size(3)])\n        else:\n            raise NotImplementedError\n\n        return new_features.squeeze(-1).contiguous()\n\n    def forward(\n        self,\n        points_xyz,\n        features=None,\n        indices=None,\n        target_xyz=None,\n    ):\n        \"\"\"forward.\n\n        Args:\n            points_xyz (Tensor): (B, N, 3) xyz coordinates of the features.\n            features (Tensor, optional): (B, C, N) features of each point.\n                Default: None.\n            indices (Tensor, optional): (B, num_point) Index of the features.\n                Default: None.\n            target_xyz (Tensor, optional): (B, M, 3) new coords of the outputs.\n                Default: None.\n\n        Returns:\n            Tensor: (B, M, 3) where M is the number of points.\n                New features xyz.\n            Tensor: (B, M, sum_k(mlps[k][-1])) where M is the number\n                of points. New feature descriptors.\n            Tensor: (B, M) where M is the number of points.\n                Index of the features.\n        \"\"\"\n        new_features_list = []\n\n        # sample points, (B, num_point, 3), (B, num_point)\n        new_xyz, indices = self._sample_points(points_xyz, features, indices,\n                                               target_xyz)\n\n        for i in range(len(self.groupers)):\n            # grouped_results may contain:\n            # - grouped_features: (B, C, num_point, nsample)\n            # - grouped_xyz: (B, 3, num_point, nsample)\n            # - grouped_idx: (B, num_point, nsample)\n            grouped_results = self.groupers[i](points_xyz, new_xyz, features)\n\n            # (B, mlp[-1], num_point, nsample)\n            new_features = self.mlps[i](grouped_results)\n\n            # this is a bit hack because PAConv outputs two values\n            # we take the first one as feature\n            if isinstance(self.mlps[i][0], PAConv):\n                assert isinstance(new_features, tuple)\n                new_features = new_features[0]\n\n            # (B, mlp[-1], num_point)\n            new_features = self._pool_features(new_features)\n            new_features_list.append(new_features)\n\n        return new_xyz, torch.cat(new_features_list, dim=1), indices\n\n\n@SA_MODULES.register_module()\nclass PointSAModuleMSG(BasePointSAModule):\n    \"\"\"Point set abstraction module with multi-scale grouping (MSG) used in\n    PointNets.\n\n    Args:\n        num_point (int): Number of points.\n        radii (list[float]): List of radius in each ball query.\n        sample_nums (list[int]): Number of samples in each ball query.\n        mlp_channels (list[list[int]]): Specify of the pointnet before\n            the global pooling for each scale.\n        fps_mod (list[str], optional): Type of FPS method, valid mod\n            ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS'].\n            F-FPS: using feature distances for FPS.\n            D-FPS: using Euclidean distances of points for FPS.\n            FS: using F-FPS and D-FPS simultaneously.\n        fps_sample_range_list (list[int], optional): Range of points to\n            apply FPS. Default: [-1].\n        dilated_group (bool, optional): Whether to use dilated ball query.\n            Default: False.\n        norm_cfg (dict, optional): Type of normalization method.\n            Default: dict(type='BN2d').\n        use_xyz (bool, optional): Whether to use xyz.\n            Default: True.\n        pool_mod (str, optional): Type of pooling method.\n            Default: 'max_pool'.\n        normalize_xyz (bool, optional): Whether to normalize local XYZ\n            with radius. Default: False.\n        bias (bool | str, optional): If specified as `auto`, it will be\n            decided by `norm_cfg`. `bias` will be set as True if\n            `norm_cfg` is None, otherwise False. Default: 'auto'.\n    \"\"\"\n\n    def __init__(self,\n                 num_point,\n                 radii,\n                 sample_nums,\n                 mlp_channels,\n                 fps_mod=['D-FPS'],\n                 fps_sample_range_list=[-1],\n                 dilated_group=False,\n                 norm_cfg=dict(type='BN2d'),\n                 use_xyz=True,\n                 pool_mod='max',\n                 normalize_xyz=False,\n                 bias='auto'):\n        super(PointSAModuleMSG, self).__init__(\n            num_point=num_point,\n            radii=radii,\n            sample_nums=sample_nums,\n            mlp_channels=mlp_channels,\n            fps_mod=fps_mod,\n            fps_sample_range_list=fps_sample_range_list,\n            dilated_group=dilated_group,\n            use_xyz=use_xyz,\n            pool_mod=pool_mod,\n            normalize_xyz=normalize_xyz)\n\n        for i in range(len(self.mlp_channels)):\n            mlp_channel = self.mlp_channels[i]\n            if use_xyz:\n                mlp_channel[0] += 3\n\n            mlp = nn.Sequential()\n            for i in range(len(mlp_channel) - 1):\n                mlp.add_module(\n                    f'layer{i}',\n                    ConvModule(\n                        mlp_channel[i],\n                        mlp_channel[i + 1],\n                        kernel_size=(1, 1),\n                        stride=(1, 1),\n                        conv_cfg=dict(type='Conv2d'),\n                        norm_cfg=norm_cfg,\n                        bias=bias))\n            self.mlps.append(mlp)\n\n\n@SA_MODULES.register_module()\nclass PointSAModule(PointSAModuleMSG):\n    \"\"\"Point set abstraction module with single-scale grouping (SSG) used in\n    PointNets.\n\n    Args:\n        mlp_channels (list[int]): Specify of the pointnet before\n            the global pooling for each scale.\n        num_point (int, optional): Number of points.\n            Default: None.\n        radius (float, optional): Radius to group with.\n            Default: None.\n        num_sample (int, optional): Number of samples in each ball query.\n            Default: None.\n        norm_cfg (dict, optional): Type of normalization method.\n            Default: dict(type='BN2d').\n        use_xyz (bool, optional): Whether to use xyz.\n            Default: True.\n        pool_mod (str, optional): Type of pooling method.\n            Default: 'max_pool'.\n        fps_mod (list[str], optional): Type of FPS method, valid mod\n            ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS'].\n        fps_sample_range_list (list[int], optional): Range of points\n            to apply FPS. Default: [-1].\n        normalize_xyz (bool, optional): Whether to normalize local XYZ\n            with radius. Default: False.\n    \"\"\"\n\n    def __init__(self,\n                 mlp_channels,\n                 num_point=None,\n                 radius=None,\n                 num_sample=None,\n                 norm_cfg=dict(type='BN2d'),\n                 use_xyz=True,\n                 pool_mod='max',\n                 fps_mod=['D-FPS'],\n                 fps_sample_range_list=[-1],\n                 normalize_xyz=False):\n        super(PointSAModule, self).__init__(\n            mlp_channels=[mlp_channels],\n            num_point=num_point,\n            radii=[radius],\n            sample_nums=[num_sample],\n            norm_cfg=norm_cfg,\n            use_xyz=use_xyz,\n            pool_mod=pool_mod,\n            fps_mod=fps_mod,\n            fps_sample_range_list=fps_sample_range_list,\n            normalize_xyz=normalize_xyz)\n"
  },
  {
    "path": "mmdet3d/ops/sparse_block.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.cnn import build_conv_layer, build_norm_layer\nfrom torch import nn\n\nfrom mmdet.models.backbones.resnet import BasicBlock, Bottleneck\nfrom .spconv import IS_SPCONV2_AVAILABLE\n\nif IS_SPCONV2_AVAILABLE:\n    from spconv.pytorch import SparseModule, SparseSequential\nelse:\n    from mmcv.ops import SparseModule, SparseSequential\n\n\ndef replace_feature(out, new_features):\n    if 'replace_feature' in out.__dir__():\n        # spconv 2.x behaviour\n        return out.replace_feature(new_features)\n    else:\n        out.features = new_features\n        return out\n\n\nclass SparseBottleneck(Bottleneck, SparseModule):\n    \"\"\"Sparse bottleneck block for PartA^2.\n\n    Bottleneck block implemented with submanifold sparse convolution.\n\n    Args:\n        inplanes (int): inplanes of block.\n        planes (int): planes of block.\n        stride (int, optional): stride of the first block. Default: 1.\n        downsample (Module, optional): down sample module for block.\n        conv_cfg (dict, optional): dictionary to construct and config conv\n            layer. Default: None.\n        norm_cfg (dict, optional): dictionary to construct and config norm\n            layer. Default: dict(type='BN').\n    \"\"\"\n\n    expansion = 4\n\n    def __init__(self,\n                 inplanes,\n                 planes,\n                 stride=1,\n                 downsample=None,\n                 conv_cfg=None,\n                 norm_cfg=None):\n\n        SparseModule.__init__(self)\n        Bottleneck.__init__(\n            self,\n            inplanes,\n            planes,\n            stride=stride,\n            downsample=downsample,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg)\n\n    def forward(self, x):\n        identity = x.features\n\n        out = self.conv1(x)\n        out = replace_feature(out, self.bn1(out.features))\n        out = replace_feature(out, self.relu(out.features))\n\n        out = self.conv2(out)\n        out = replace_feature(out, self.bn2(out.features))\n        out = replace_feature(out, self.relu(out.features))\n\n        out = self.conv3(out)\n        out = replace_feature(out, self.bn3(out.features))\n\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = replace_feature(out, out.features + identity)\n        out = replace_feature(out, self.relu(out.features))\n\n        return out\n\n\nclass SparseBasicBlock(BasicBlock, SparseModule):\n    \"\"\"Sparse basic block for PartA^2.\n\n    Sparse basic block implemented with submanifold sparse convolution.\n\n    Args:\n        inplanes (int): inplanes of block.\n        planes (int): planes of block.\n        stride (int, optional): stride of the first block. Default: 1.\n        downsample (Module, optional): down sample module for block.\n        conv_cfg (dict, optional): dictionary to construct and config conv\n            layer. Default: None.\n        norm_cfg (dict, optional): dictionary to construct and config norm\n            layer. Default: dict(type='BN').\n    \"\"\"\n\n    expansion = 1\n\n    def __init__(self,\n                 inplanes,\n                 planes,\n                 stride=1,\n                 downsample=None,\n                 conv_cfg=None,\n                 norm_cfg=None):\n        SparseModule.__init__(self)\n        BasicBlock.__init__(\n            self,\n            inplanes,\n            planes,\n            stride=stride,\n            downsample=downsample,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg)\n\n    def forward(self, x):\n        identity = x.features\n\n        assert x.features.dim() == 2, f'x.features.dim()={x.features.dim()}'\n        out = self.conv1(x)\n        out = replace_feature(out, self.norm1(out.features))\n        out = replace_feature(out, self.relu(out.features))\n\n        out = self.conv2(out)\n        out = replace_feature(out, self.norm2(out.features))\n\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = replace_feature(out, out.features + identity)\n        out = replace_feature(out, self.relu(out.features))\n\n        return out\n\n\ndef make_sparse_convmodule(in_channels,\n                           out_channels,\n                           kernel_size,\n                           indice_key,\n                           stride=1,\n                           padding=0,\n                           conv_type='SubMConv3d',\n                           norm_cfg=None,\n                           order=('conv', 'norm', 'act')):\n    \"\"\"Make sparse convolution module.\n\n    Args:\n        in_channels (int): the number of input channels\n        out_channels (int): the number of out channels\n        kernel_size (int|tuple(int)): kernel size of convolution\n        indice_key (str): the indice key used for sparse tensor\n        stride (int|tuple(int)): the stride of convolution\n        padding (int or list[int]): the padding number of input\n        conv_type (str): sparse conv type in spconv\n        norm_cfg (dict[str]): config of normalization layer\n        order (tuple[str]): The order of conv/norm/activation layers. It is a\n            sequence of \"conv\", \"norm\" and \"act\". Common examples are\n            (\"conv\", \"norm\", \"act\") and (\"act\", \"conv\", \"norm\").\n\n    Returns:\n        spconv.SparseSequential: sparse convolution module.\n    \"\"\"\n    assert isinstance(order, tuple) and len(order) <= 3\n    assert set(order) | {'conv', 'norm', 'act'} == {'conv', 'norm', 'act'}\n\n    conv_cfg = dict(type=conv_type, indice_key=indice_key)\n\n    layers = list()\n    for layer in order:\n        if layer == 'conv':\n            if conv_type not in [\n                    'SparseInverseConv3d', 'SparseInverseConv2d',\n                    'SparseInverseConv1d'\n            ]:\n                layers.append(\n                    build_conv_layer(\n                        conv_cfg,\n                        in_channels,\n                        out_channels,\n                        kernel_size,\n                        stride=stride,\n                        padding=padding,\n                        bias=False))\n            else:\n                layers.append(\n                    build_conv_layer(\n                        conv_cfg,\n                        in_channels,\n                        out_channels,\n                        kernel_size,\n                        bias=False))\n        elif layer == 'norm':\n            layers.append(build_norm_layer(norm_cfg, out_channels)[1])\n        elif layer == 'act':\n            layers.append(nn.ReLU(inplace=True))\n\n    layers = SparseSequential(*layers)\n    return layers\n"
  },
  {
    "path": "mmdet3d/ops/spconv/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .overwrite_spconv.write_spconv2 import register_spconv2\n\ntry:\n    import spconv\nexcept ImportError:\n    IS_SPCONV2_AVAILABLE = False\nelse:\n    if hasattr(spconv, '__version__') and spconv.__version__ >= '2.0.0':\n        IS_SPCONV2_AVAILABLE = register_spconv2()\n    else:\n        IS_SPCONV2_AVAILABLE = False\n\n__all__ = ['IS_SPCONV2_AVAILABLE']\n"
  },
  {
    "path": "mmdet3d/ops/spconv/overwrite_spconv/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .write_spconv2 import register_spconv2\n\n__all__ = ['register_spconv2']\n"
  },
  {
    "path": "mmdet3d/ops/spconv/overwrite_spconv/write_spconv2.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport itertools\n\nfrom mmcv.cnn.bricks.registry import CONV_LAYERS\nfrom torch.nn.parameter import Parameter\n\n\ndef register_spconv2():\n    \"\"\"This func registers spconv2.0 spconv ops to overwrite the default mmcv\n    spconv ops.\"\"\"\n    try:\n        from spconv.pytorch import (SparseConv2d, SparseConv3d, SparseConv4d,\n                                    SparseConvTranspose2d,\n                                    SparseConvTranspose3d, SparseInverseConv2d,\n                                    SparseInverseConv3d, SparseModule,\n                                    SubMConv2d, SubMConv3d, SubMConv4d)\n    except ImportError:\n        return False\n    else:\n        CONV_LAYERS._register_module(SparseConv2d, 'SparseConv2d', force=True)\n        CONV_LAYERS._register_module(SparseConv3d, 'SparseConv3d', force=True)\n        CONV_LAYERS._register_module(SparseConv4d, 'SparseConv4d', force=True)\n\n        CONV_LAYERS._register_module(\n            SparseConvTranspose2d, 'SparseConvTranspose2d', force=True)\n        CONV_LAYERS._register_module(\n            SparseConvTranspose3d, 'SparseConvTranspose3d', force=True)\n\n        CONV_LAYERS._register_module(\n            SparseInverseConv2d, 'SparseInverseConv2d', force=True)\n        CONV_LAYERS._register_module(\n            SparseInverseConv3d, 'SparseInverseConv3d', force=True)\n\n        CONV_LAYERS._register_module(SubMConv2d, 'SubMConv2d', force=True)\n        CONV_LAYERS._register_module(SubMConv3d, 'SubMConv3d', force=True)\n        CONV_LAYERS._register_module(SubMConv4d, 'SubMConv4d', force=True)\n        SparseModule._version = 2\n        SparseModule._load_from_state_dict = _load_from_state_dict\n        return True\n\n\ndef _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,\n                          missing_keys, unexpected_keys, error_msgs):\n    \"\"\"Rewrite this func to compat the convolutional kernel weights between\n    spconv 1.x in MMCV and 2.x in spconv2.x.\n\n    Kernel weights in MMCV spconv has shape in (D,H,W,in_channel,out_channel) ,\n    while those in spcon2.x is in (out_channel,D,H,W,in_channel).\n    \"\"\"\n    version = local_metadata.get('version', None)\n    for hook in self._load_state_dict_pre_hooks.values():\n        hook(state_dict, prefix, local_metadata, strict, missing_keys,\n             unexpected_keys, error_msgs)\n\n    local_name_params = itertools.chain(self._parameters.items(),\n                                        self._buffers.items())\n    local_state = {k: v.data for k, v in local_name_params if v is not None}\n\n    for name, param in local_state.items():\n        key = prefix + name\n        if key in state_dict:\n            input_param = state_dict[key]\n\n            # Backward compatibility: loading 1-dim tensor from\n            # 0.3.* to version 0.4+\n            if len(param.shape) == 0 and len(input_param.shape) == 1:\n                input_param = input_param[0]\n            if version != 2:\n                dims = [len(input_param.shape) - 1] + list(\n                    range(len(input_param.shape) - 1))\n                input_param = input_param.permute(*dims)\n            if input_param.shape != param.shape:\n                # local shape should match the one in checkpoint\n                error_msgs.append(\n                    f'size mismatch for {key}: copying a param with '\n                    f'shape {key, input_param.shape} from checkpoint,'\n                    f'the shape in current model is {param.shape}.')\n                continue\n\n            if isinstance(input_param, Parameter):\n                # backwards compatibility for serialized parameters\n                input_param = input_param.data\n            try:\n                param.copy_(input_param)\n            except Exception:\n                error_msgs.append(\n                    f'While copying the parameter named \"{key}\", whose '\n                    f'dimensions in the model are {param.size()} and whose '\n                    f'dimensions in the checkpoint are {input_param.size()}.')\n        elif strict:\n            missing_keys.append(key)\n\n    if strict:\n        for key, input_param in state_dict.items():\n            if key.startswith(prefix):\n                input_name = key[len(prefix):]\n                input_name = input_name.split(\n                    '.', 1)[0]  # get the name of param/buffer/child\n                if input_name not in self._modules \\\n                        and input_name not in local_state:\n                    unexpected_keys.append(key)\n"
  },
  {
    "path": "mmdet3d/utils/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.utils import Registry, build_from_cfg, print_log\n\nfrom .collect_env import collect_env\nfrom .compat_cfg import compat_cfg\nfrom .logger import get_root_logger\nfrom .misc import find_latest_checkpoint\nfrom .setup_env import setup_multi_processes\n\n__all__ = [\n    'Registry', 'build_from_cfg', 'get_root_logger', 'collect_env',\n    'print_log', 'setup_multi_processes', 'find_latest_checkpoint',\n    'compat_cfg'\n]\n"
  },
  {
    "path": "mmdet3d/utils/collect_env.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.utils import collect_env as collect_base_env\nfrom mmcv.utils import get_git_hash\n\nimport mmdet\nimport mmdet3d\nimport mmseg\nfrom mmdet3d.ops.spconv import IS_SPCONV2_AVAILABLE\n\n\ndef collect_env():\n    \"\"\"Collect the information of the running environments.\"\"\"\n    env_info = collect_base_env()\n    env_info['MMDetection'] = mmdet.__version__\n    env_info['MMSegmentation'] = mmseg.__version__\n    env_info['MMDetection3D'] = mmdet3d.__version__ + '+' + get_git_hash()[:7]\n    env_info['spconv2.0'] = IS_SPCONV2_AVAILABLE\n    return env_info\n\n\nif __name__ == '__main__':\n    for name, val in collect_env().items():\n        print(f'{name}: {val}')\n"
  },
  {
    "path": "mmdet3d/utils/compat_cfg.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\nimport warnings\n\nfrom mmcv import ConfigDict\n\n\ndef compat_cfg(cfg):\n    \"\"\"This function would modify some filed to keep the compatibility of\n    config.\n\n    For example, it will move some args which will be deprecated to the correct\n    fields.\n    \"\"\"\n    cfg = copy.deepcopy(cfg)\n    cfg = compat_imgs_per_gpu(cfg)\n    cfg = compat_loader_args(cfg)\n    cfg = compat_runner_args(cfg)\n    return cfg\n\n\ndef compat_runner_args(cfg):\n    if 'runner' not in cfg:\n        cfg.runner = ConfigDict({\n            'type': 'EpochBasedRunner',\n            'max_epochs': cfg.total_epochs\n        })\n        warnings.warn(\n            'config is now expected to have a `runner` section, '\n            'please set `runner` in your config.', UserWarning)\n    else:\n        if 'total_epochs' in cfg:\n            assert cfg.total_epochs == cfg.runner.max_epochs\n    return cfg\n\n\ndef compat_imgs_per_gpu(cfg):\n    cfg = copy.deepcopy(cfg)\n    if 'imgs_per_gpu' in cfg.data:\n        warnings.warn('\"imgs_per_gpu\" is deprecated in MMDet V2.0. '\n                      'Please use \"samples_per_gpu\" instead')\n        if 'samples_per_gpu' in cfg.data:\n            warnings.warn(\n                f'Got \"imgs_per_gpu\"={cfg.data.imgs_per_gpu} and '\n                f'\"samples_per_gpu\"={cfg.data.samples_per_gpu}, \"imgs_per_gpu\"'\n                f'={cfg.data.imgs_per_gpu} is used in this experiments')\n        else:\n            warnings.warn('Automatically set \"samples_per_gpu\"=\"imgs_per_gpu\"='\n                          f'{cfg.data.imgs_per_gpu} in this experiments')\n        cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu\n    return cfg\n\n\ndef compat_loader_args(cfg):\n    \"\"\"Deprecated sample_per_gpu in cfg.data.\"\"\"\n\n    cfg = copy.deepcopy(cfg)\n    if 'train_dataloader' not in cfg.data:\n        cfg.data['train_dataloader'] = ConfigDict()\n    if 'val_dataloader' not in cfg.data:\n        cfg.data['val_dataloader'] = ConfigDict()\n    if 'test_dataloader' not in cfg.data:\n        cfg.data['test_dataloader'] = ConfigDict()\n\n    # special process for train_dataloader\n    if 'samples_per_gpu' in cfg.data:\n\n        samples_per_gpu = cfg.data.pop('samples_per_gpu')\n        assert 'samples_per_gpu' not in \\\n               cfg.data.train_dataloader, ('`samples_per_gpu` are set '\n                                           'in `data` field and ` '\n                                           'data.train_dataloader` '\n                                           'at the same time. '\n                                           'Please only set it in '\n                                           '`data.train_dataloader`. ')\n        cfg.data.train_dataloader['samples_per_gpu'] = samples_per_gpu\n\n    if 'persistent_workers' in cfg.data:\n\n        persistent_workers = cfg.data.pop('persistent_workers')\n        assert 'persistent_workers' not in \\\n               cfg.data.train_dataloader, ('`persistent_workers` are set '\n                                           'in `data` field and ` '\n                                           'data.train_dataloader` '\n                                           'at the same time. '\n                                           'Please only set it in '\n                                           '`data.train_dataloader`. ')\n        cfg.data.train_dataloader['persistent_workers'] = persistent_workers\n\n    if 'workers_per_gpu' in cfg.data:\n\n        workers_per_gpu = cfg.data.pop('workers_per_gpu')\n        cfg.data.train_dataloader['workers_per_gpu'] = workers_per_gpu\n        cfg.data.val_dataloader['workers_per_gpu'] = workers_per_gpu\n        cfg.data.test_dataloader['workers_per_gpu'] = workers_per_gpu\n\n    # special process for val_dataloader\n    if 'samples_per_gpu' in cfg.data.val:\n        # keep default value of `sample_per_gpu` is 1\n        assert 'samples_per_gpu' not in \\\n               cfg.data.val_dataloader, ('`samples_per_gpu` are set '\n                                         'in `data.val` field and ` '\n                                         'data.val_dataloader` at '\n                                         'the same time. '\n                                         'Please only set it in '\n                                         '`data.val_dataloader`. ')\n        cfg.data.val_dataloader['samples_per_gpu'] = \\\n            cfg.data.val.pop('samples_per_gpu')\n    # special process for val_dataloader\n\n    # in case the test dataset is concatenated\n    if isinstance(cfg.data.test, dict):\n        if 'samples_per_gpu' in cfg.data.test:\n            assert 'samples_per_gpu' not in \\\n                   cfg.data.test_dataloader, ('`samples_per_gpu` are set '\n                                              'in `data.test` field and ` '\n                                              'data.test_dataloader` '\n                                              'at the same time. '\n                                              'Please only set it in '\n                                              '`data.test_dataloader`. ')\n\n            cfg.data.test_dataloader['samples_per_gpu'] = \\\n                cfg.data.test.pop('samples_per_gpu')\n\n    elif isinstance(cfg.data.test, list):\n        for ds_cfg in cfg.data.test:\n            if 'samples_per_gpu' in ds_cfg:\n                assert 'samples_per_gpu' not in \\\n                       cfg.data.test_dataloader, ('`samples_per_gpu` are set '\n                                                  'in `data.test` field and ` '\n                                                  'data.test_dataloader` at'\n                                                  ' the same time. '\n                                                  'Please only set it in '\n                                                  '`data.test_dataloader`. ')\n        samples_per_gpu = max(\n            [ds_cfg.pop('samples_per_gpu', 1) for ds_cfg in cfg.data.test])\n        cfg.data.test_dataloader['samples_per_gpu'] = samples_per_gpu\n\n    return cfg\n"
  },
  {
    "path": "mmdet3d/utils/logger.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport logging\n\nfrom mmcv.utils import get_logger\n\n\ndef get_root_logger(log_file=None, log_level=logging.INFO, name='mmdet3d'):\n    \"\"\"Get root logger and add a keyword filter to it.\n\n    The logger will be initialized if it has not been initialized. By default a\n    StreamHandler will be added. If `log_file` is specified, a FileHandler will\n    also be added. The name of the root logger is the top-level package name,\n    e.g., \"mmdet3d\".\n\n    Args:\n        log_file (str, optional): File path of log. Defaults to None.\n        log_level (int, optional): The level of logger.\n            Defaults to logging.INFO.\n        name (str, optional): The name of the root logger, also used as a\n            filter keyword. Defaults to 'mmdet3d'.\n\n    Returns:\n        :obj:`logging.Logger`: The obtained logger\n    \"\"\"\n    logger = get_logger(name=name, log_file=log_file, log_level=log_level)\n\n    # add a logging filter\n    logging_filter = logging.Filter(name)\n    logging_filter.filter = lambda record: record.find(name) != -1\n\n    return logger\n"
  },
  {
    "path": "mmdet3d/utils/misc.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport glob\nimport os.path as osp\nimport warnings\n\n\ndef find_latest_checkpoint(path, suffix='pth'):\n    \"\"\"Find the latest checkpoint from the working directory. This function is\n    copied from mmdetection.\n\n    Args:\n        path(str): The path to find checkpoints.\n        suffix(str): File extension.\n            Defaults to pth.\n\n    Returns:\n        latest_path(str | None): File path of the latest checkpoint.\n    References:\n        .. [1] https://github.com/microsoft/SoftTeacher\n                  /blob/main/ssod/utils/patch.py\n    \"\"\"\n    if not osp.exists(path):\n        warnings.warn('The path of checkpoints does not exist.')\n        return None\n    if osp.exists(osp.join(path, f'latest.{suffix}')):\n        return osp.join(path, f'latest.{suffix}')\n\n    checkpoints = glob.glob(osp.join(path, f'*.{suffix}'))\n    if len(checkpoints) == 0:\n        warnings.warn('There are no checkpoints in the path.')\n        return None\n    latest = -1\n    latest_path = None\n    for checkpoint in checkpoints:\n        count = int(osp.basename(checkpoint).split('_')[-1].split('.')[0])\n        if count > latest:\n            latest = count\n            latest_path = checkpoint\n    return latest_path\n"
  },
  {
    "path": "mmdet3d/utils/setup_env.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os\nimport platform\nimport warnings\n\nimport cv2\nfrom torch import multiprocessing as mp\n\n\ndef setup_multi_processes(cfg):\n    \"\"\"Setup multi-processing environment variables.\"\"\"\n    # set multi-process start method as `fork` to speed up the training\n    if platform.system() != 'Windows':\n        mp_start_method = cfg.get('mp_start_method', 'fork')\n        current_method = mp.get_start_method(allow_none=True)\n        if current_method is not None and current_method != mp_start_method:\n            warnings.warn(\n                f'Multi-processing start method `{mp_start_method}` is '\n                f'different from the previous setting `{current_method}`.'\n                f'It will be force set to `{mp_start_method}`. You can change '\n                f'this behavior by changing `mp_start_method` in your config.')\n        mp.set_start_method(mp_start_method, force=True)\n\n    # disable opencv multithreading to avoid system being overloaded\n    opencv_num_threads = cfg.get('opencv_num_threads', 0)\n    cv2.setNumThreads(opencv_num_threads)\n\n    # setup OMP threads\n    # This code is referred from https://github.com/pytorch/pytorch/blob/master/torch/distributed/run.py  # noqa\n    workers_per_gpu = cfg.data.get('workers_per_gpu', 1)\n    if 'train_dataloader' in cfg.data:\n        workers_per_gpu = \\\n            max(cfg.data.train_dataloader.get('workers_per_gpu', 1),\n                workers_per_gpu)\n\n    if 'OMP_NUM_THREADS' not in os.environ and workers_per_gpu > 1:\n        omp_num_threads = 1\n        warnings.warn(\n            f'Setting OMP_NUM_THREADS environment variable for each process '\n            f'to be {omp_num_threads} in default, to avoid your system being '\n            f'overloaded, please further tune the variable for optimal '\n            f'performance in your application as needed.')\n        os.environ['OMP_NUM_THREADS'] = str(omp_num_threads)\n\n    # setup MKL threads\n    if 'MKL_NUM_THREADS' not in os.environ and workers_per_gpu > 1:\n        mkl_num_threads = 1\n        warnings.warn(\n            f'Setting MKL_NUM_THREADS environment variable for each process '\n            f'to be {mkl_num_threads} in default, to avoid your system being '\n            f'overloaded, please further tune the variable for optimal '\n            f'performance in your application as needed.')\n        os.environ['MKL_NUM_THREADS'] = str(mkl_num_threads)\n"
  },
  {
    "path": "mmdet3d/version.py",
    "content": "# Copyright (c) Open-MMLab. All rights reserved.\n\n__version__ = '1.0.0rc4'\nshort_version = __version__\n\n\ndef parse_version_info(version_str):\n    version_info = []\n    for x in version_str.split('.'):\n        if x.isdigit():\n            version_info.append(int(x))\n        elif x.find('rc') != -1:\n            patch_version = x.split('rc')\n            version_info.append(int(patch_version[0]))\n            version_info.append(f'rc{patch_version[1]}')\n    return tuple(version_info)\n\n\nversion_info = parse_version_info(__version__)\n"
  },
  {
    "path": "requirements/build.txt",
    "content": ""
  },
  {
    "path": "requirements/docs.txt",
    "content": "docutils==0.16.0\nm2r\nmistune==0.8.4\nmyst-parser\n-e git+https://github.com/open-mmlab/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme\nsphinx==4.0.2\nsphinx-copybutton\nsphinx_markdown_tables\n"
  },
  {
    "path": "requirements/mminstall.txt",
    "content": "mmcv-full>=1.4.8,<=1.6.0\nmmdet>=2.24.0,<=3.0.0\nmmsegmentation>=0.20.0,<=1.0.0\n"
  },
  {
    "path": "requirements/optional.txt",
    "content": "open3d\nspconv\nwaymo-open-dataset-tf-2-1-0==1.2.0\n"
  },
  {
    "path": "requirements/readthedocs.txt",
    "content": "mmcv>=1.4.8\nmmdet>=2.24.0\nmmsegmentation>=0.20.1\ntorch\ntorchvision\n"
  },
  {
    "path": "requirements/runtime.txt",
    "content": "lyft_dataset_sdk\nnetworkx>=2.2,<2.3\nnumba==0.53.0\nnumpy\nnuscenes-devkit\nplyfile\nscikit-image\n# by default we also use tensorboard to log results\ntensorboard\ntrimesh>=2.35.39,<2.35.40\n"
  },
  {
    "path": "requirements/tests.txt",
    "content": "asynctest\ncodecov\nflake8\ninterrogate\nisort\n# Note: used for kwarray.group_items, this may be ported to mmcv in the future.\nkwarray\npytest\npytest-cov\npytest-runner\nubelt\nxdoctest >= 0.10.0\nyapf\n"
  },
  {
    "path": "tools/analysis_tools/analyze_logs.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport json\nfrom collections import defaultdict\n\nimport numpy as np\nimport seaborn as sns\nfrom matplotlib import pyplot as plt\n\n\ndef cal_train_time(log_dicts, args):\n    for i, log_dict in enumerate(log_dicts):\n        print(f'{\"-\" * 5}Analyze train time of {args.json_logs[i]}{\"-\" * 5}')\n        all_times = []\n        for epoch in log_dict.keys():\n            if args.include_outliers:\n                all_times.append(log_dict[epoch]['time'])\n            else:\n                all_times.append(log_dict[epoch]['time'][1:])\n        all_times = np.array(all_times)\n        epoch_ave_time = all_times.mean(-1)\n        slowest_epoch = epoch_ave_time.argmax()\n        fastest_epoch = epoch_ave_time.argmin()\n        std_over_epoch = epoch_ave_time.std()\n        print(f'slowest epoch {slowest_epoch + 1}, '\n              f'average time is {epoch_ave_time[slowest_epoch]:.4f}')\n        print(f'fastest epoch {fastest_epoch + 1}, '\n              f'average time is {epoch_ave_time[fastest_epoch]:.4f}')\n        print(f'time std over epochs is {std_over_epoch:.4f}')\n        print(f'average iter time: {np.mean(all_times):.4f} s/iter')\n        print()\n\n\ndef plot_curve(log_dicts, args):\n    if args.backend is not None:\n        plt.switch_backend(args.backend)\n    sns.set_style(args.style)\n    # if legend is None, use {filename}_{key} as legend\n    legend = args.legend\n    if legend is None:\n        legend = []\n        for json_log in args.json_logs:\n            for metric in args.keys:\n                legend.append(f'{json_log}_{metric}')\n    assert len(legend) == (len(args.json_logs) * len(args.keys))\n    metrics = args.keys\n\n    num_metrics = len(metrics)\n    for i, log_dict in enumerate(log_dicts):\n        epochs = list(log_dict.keys())\n        for j, metric in enumerate(metrics):\n            print(f'plot curve of {args.json_logs[i]}, metric is {metric}')\n            if metric not in log_dict[epochs[args.interval - 1]]:\n                raise KeyError(\n                    f'{args.json_logs[i]} does not contain metric {metric}')\n\n            if args.mode == 'eval':\n                if min(epochs) == args.interval:\n                    x0 = args.interval\n                else:\n                    # if current training is resumed from previous checkpoint\n                    # we lost information in early epochs\n                    # `xs` should start according to `min(epochs)`\n                    if min(epochs) % args.interval == 0:\n                        x0 = min(epochs)\n                    else:\n                        # find the first epoch that do eval\n                        x0 = min(epochs) + args.interval - \\\n                            min(epochs) % args.interval\n                xs = np.arange(x0, max(epochs) + 1, args.interval)\n                ys = []\n                for epoch in epochs[args.interval - 1::args.interval]:\n                    ys += log_dict[epoch][metric]\n\n                # if training is aborted before eval of the last epoch\n                # `xs` and `ys` will have different length and cause an error\n                # check if `ys[-1]` is empty here\n                if not log_dict[epoch][metric]:\n                    xs = xs[:-1]\n\n                ax = plt.gca()\n                ax.set_xticks(xs)\n                plt.xlabel('epoch')\n                plt.plot(xs, ys, label=legend[i * num_metrics + j], marker='o')\n            else:\n                xs = []\n                ys = []\n                num_iters_per_epoch = \\\n                    log_dict[epochs[args.interval-1]]['iter'][-1]\n                for epoch in epochs[args.interval - 1::args.interval]:\n                    iters = log_dict[epoch]['iter']\n                    if log_dict[epoch]['mode'][-1] == 'val':\n                        iters = iters[:-1]\n                    xs.append(\n                        np.array(iters) + (epoch - 1) * num_iters_per_epoch)\n                    ys.append(np.array(log_dict[epoch][metric][:len(iters)]))\n                xs = np.concatenate(xs)\n                ys = np.concatenate(ys)\n                plt.xlabel('iter')\n                plt.plot(\n                    xs, ys, label=legend[i * num_metrics + j], linewidth=0.5)\n            plt.legend()\n        if args.title is not None:\n            plt.title(args.title)\n    if args.out is None:\n        plt.show()\n    else:\n        print(f'save curve to: {args.out}')\n        plt.savefig(args.out)\n        plt.cla()\n\n\ndef add_plot_parser(subparsers):\n    parser_plt = subparsers.add_parser(\n        'plot_curve', help='parser for plotting curves')\n    parser_plt.add_argument(\n        'json_logs',\n        type=str,\n        nargs='+',\n        help='path of train log in json format')\n    parser_plt.add_argument(\n        '--keys',\n        type=str,\n        nargs='+',\n        default=['mAP_0.25'],\n        help='the metric that you want to plot')\n    parser_plt.add_argument('--title', type=str, help='title of figure')\n    parser_plt.add_argument(\n        '--legend',\n        type=str,\n        nargs='+',\n        default=None,\n        help='legend of each plot')\n    parser_plt.add_argument(\n        '--backend', type=str, default=None, help='backend of plt')\n    parser_plt.add_argument(\n        '--style', type=str, default='dark', help='style of plt')\n    parser_plt.add_argument('--out', type=str, default=None)\n    parser_plt.add_argument('--mode', type=str, default='train')\n    parser_plt.add_argument('--interval', type=int, default=1)\n\n\ndef add_time_parser(subparsers):\n    parser_time = subparsers.add_parser(\n        'cal_train_time',\n        help='parser for computing the average time per training iteration')\n    parser_time.add_argument(\n        'json_logs',\n        type=str,\n        nargs='+',\n        help='path of train log in json format')\n    parser_time.add_argument(\n        '--include-outliers',\n        action='store_true',\n        help='include the first value of every epoch when computing '\n        'the average time')\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Analyze Json Log')\n    # currently only support plot curve and calculate average train time\n    subparsers = parser.add_subparsers(dest='task', help='task parser')\n    add_plot_parser(subparsers)\n    add_time_parser(subparsers)\n    args = parser.parse_args()\n    return args\n\n\ndef load_json_logs(json_logs):\n    # load and convert json_logs to log_dict, key is epoch, value is a sub dict\n    # keys of sub dict is different metrics, e.g. memory, bbox_mAP\n    # value of sub dict is a list of corresponding values of all iterations\n    log_dicts = [dict() for _ in json_logs]\n    for json_log, log_dict in zip(json_logs, log_dicts):\n        with open(json_log, 'r') as log_file:\n            for line in log_file:\n                log = json.loads(line.strip())\n                # skip lines without `epoch` field\n                if 'epoch' not in log:\n                    continue\n                epoch = log.pop('epoch')\n                if epoch not in log_dict:\n                    log_dict[epoch] = defaultdict(list)\n                for k, v in log.items():\n                    log_dict[epoch][k].append(v)\n    return log_dicts\n\n\ndef main():\n    args = parse_args()\n\n    json_logs = args.json_logs\n    for json_log in json_logs:\n        assert json_log.endswith('.json')\n\n    log_dicts = load_json_logs(json_logs)\n\n    eval(args.task)(log_dicts, args)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/analysis_tools/benchmark.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport time\n\nimport torch\nfrom mmcv import Config\nfrom mmcv.parallel import MMDataParallel\nfrom mmcv.runner import load_checkpoint, wrap_fp16_model\n\nfrom mmdet3d.datasets import build_dataloader, build_dataset\nfrom mmdet3d.models import build_detector\nfrom tools.misc.fuse_conv_bn import fuse_module\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='MMDet benchmark a model')\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument('checkpoint', help='checkpoint file')\n    parser.add_argument('--samples', default=2000, help='samples to benchmark')\n    parser.add_argument(\n        '--log-interval', default=50, help='interval of logging')\n    parser.add_argument(\n        '--fuse-conv-bn',\n        action='store_true',\n        help='Whether to fuse conv and bn, this will slightly increase'\n        'the inference speed')\n    parser.add_argument(\n        '--no-acceleration',\n        action='store_true',\n        help='Omit the pre-computation acceleration')\n    args = parser.parse_args()\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    cfg = Config.fromfile(args.config)\n    # set cudnn_benchmark\n    if cfg.get('cudnn_benchmark', False):\n        torch.backends.cudnn.benchmark = True\n    cfg.model.pretrained = None\n    cfg.data.test.test_mode = True\n\n    # build the dataloader\n    # TODO: support multiple images per gpu (only minor changes are needed)\n    dataset = build_dataset(cfg.data.test)\n    data_loader = build_dataloader(\n        dataset,\n        samples_per_gpu=1,\n        workers_per_gpu=cfg.data.workers_per_gpu,\n        dist=False,\n        shuffle=False)\n\n    # build the model and load checkpoint\n    if not args.no_acceleration:\n        cfg.model.img_view_transformer.accelerate=True\n    cfg.model.train_cfg = None\n    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))\n    fp16_cfg = cfg.get('fp16', None)\n    if fp16_cfg is not None:\n        wrap_fp16_model(model)\n    # load_checkpoint(model, args.checkpoint, map_location='cpu')\n    if args.fuse_conv_bn:\n        model = fuse_module(model)\n\n    model = MMDataParallel(model, device_ids=[0])\n\n    model.eval()\n\n    # the first several iterations may be very slow so skip them\n    num_warmup = 5\n    pure_inf_time = 0\n\n    # benchmark with several samples and take the average\n    for i, data in enumerate(data_loader):\n\n        torch.cuda.synchronize()\n        start_time = time.perf_counter()\n\n        with torch.no_grad():\n            model(return_loss=False, rescale=True, **data)\n\n        torch.cuda.synchronize()\n        elapsed = time.perf_counter() - start_time\n\n        if i >= num_warmup:\n            pure_inf_time += elapsed\n            if (i + 1) % args.log_interval == 0:\n                fps = (i + 1 - num_warmup) / pure_inf_time\n                print(f'Done image [{i + 1:<3}/ {args.samples}], '\n                      f'fps: {fps:.1f} img / s')\n\n        if (i + 1) == args.samples:\n            pure_inf_time += elapsed\n            fps = (i + 1 - num_warmup) / pure_inf_time\n            print(f'Overall fps: {fps:.1f} img / s')\n            break\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/analysis_tools/benchmark_sequential.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport time\n\nimport torch\nfrom mmcv import Config\nfrom mmcv.parallel import MMDataParallel\nfrom mmcv.runner import load_checkpoint, wrap_fp16_model\n\nfrom mmdet3d.datasets import build_dataloader, build_dataset\nfrom mmdet3d.models import build_detector\nfrom tools.misc.fuse_conv_bn import fuse_module\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='MMDet benchmark a model')\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument('checkpoint', help='checkpoint file')\n    parser.add_argument('--samples', default=400, help='samples to benchmark')\n    parser.add_argument(\n        '--log-interval', default=50, help='interval of logging')\n    parser.add_argument(\n        '--fuse-conv-bn',\n        action='store_true',\n        help='Whether to fuse conv and bn, this will slightly increase'\n        'the inference speed')\n    parser.add_argument(\n        '--no-acceleration',\n        action='store_true',\n        help='Omit the pre-computation acceleration')\n    args = parser.parse_args()\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    cfg = Config.fromfile(args.config)\n    # set cudnn_benchmark\n    if cfg.get('cudnn_benchmark', False):\n        torch.backends.cudnn.benchmark = True\n    cfg.model.pretrained = None\n    cfg.data.test.test_mode = True\n\n    # build the dataloader\n    # TODO: support multiple images per gpu (only minor changes are needed)\n    dataset = build_dataset(cfg.data.test)\n    data_loader = build_dataloader(\n        dataset,\n        samples_per_gpu=1,\n        workers_per_gpu=cfg.data.workers_per_gpu,\n        dist=False,\n        shuffle=False)\n\n    # build the model and load checkpoint\n    cfg.model.train_cfg = None\n    cfg.model.align_after_view_transfromation=True\n    if not args.no_acceleration:\n        cfg.model.img_view_transformer.accelerate=True\n    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))\n    fp16_cfg = cfg.get('fp16', None)\n    if fp16_cfg is not None:\n        wrap_fp16_model(model)\n    load_checkpoint(model, args.checkpoint, map_location='cpu')\n    if args.fuse_conv_bn:\n        model = fuse_module(model)\n\n    model = MMDataParallel(model, device_ids=[0])\n\n    model.eval()\n\n    # the first several iterations may be very slow so skip them\n    num_warmup = 5\n    pure_inf_time = 0\n\n    # benchmark with several samples and take the average\n    for i, data in enumerate(data_loader):\n        inputs = [d.cuda() for d in data['img_inputs'][0]]\n        with torch.no_grad():\n            feat_prev, inputs = model.module.extract_img_feat(\n                inputs, pred_prev=True, img_metas=None)\n        data['img_inputs'][0] = inputs\n\n        torch.cuda.synchronize()\n        start_time = time.perf_counter()\n\n        with torch.no_grad():\n            model(\n                return_loss=False,\n                rescale=True,\n                sequential=True,\n                feat_prev=feat_prev,\n                **data)\n\n        torch.cuda.synchronize()\n        elapsed = time.perf_counter() - start_time\n\n        if i >= num_warmup:\n            pure_inf_time += elapsed\n            if (i + 1) % args.log_interval == 0:\n                fps = (i + 1 - num_warmup) / pure_inf_time\n                print(f'Done image [{i + 1:<3}/ {args.samples}], '\n                      f'fps: {fps:.1f} img / s')\n\n        if (i + 1) == args.samples:\n            pure_inf_time += elapsed\n            fps = (i + 1 - num_warmup) / pure_inf_time\n            print(f'Overall fps: {fps:.1f} img / s')\n            break\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/analysis_tools/benchmark_trt.py",
    "content": "import time\nfrom typing import Dict, Optional, Sequence, Union\n\nimport tensorrt as trt\nimport torch\nimport torch.onnx\nfrom mmcv import Config\nfrom mmdeploy.backend.tensorrt import load_tensorrt_plugin\n\ntry:\n    # If mmdet version > 2.23.0, compat_cfg would be imported and\n    # used from mmdet instead of mmdet3d.\n    from mmdet.utils import compat_cfg\nexcept ImportError:\n    from mmdet3d.utils import compat_cfg\n\nimport argparse\n\nfrom mmdet3d.core import bbox3d2result\nfrom mmdet3d.core.bbox.structures.box_3d_mode import LiDARInstance3DBoxes\nfrom mmdet3d.datasets import build_dataloader, build_dataset\nfrom mmdet3d.models import build_model\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Deploy BEVDet with Tensorrt')\n    parser.add_argument('config', help='deploy config file path')\n    parser.add_argument('engine', help='checkpoint file')\n    parser.add_argument('--samples', default=500, help='samples to benchmark')\n    parser.add_argument('--postprocessing', action='store_true')\n    args = parser.parse_args()\n    return args\n\n\ndef torch_dtype_from_trt(dtype: trt.DataType) -> torch.dtype:\n    \"\"\"Convert pytorch dtype to TensorRT dtype.\n\n    Args:\n        dtype (str.DataType): The data type in tensorrt.\n\n    Returns:\n        torch.dtype: The corresponding data type in torch.\n    \"\"\"\n\n    if dtype == trt.bool:\n        return torch.bool\n    elif dtype == trt.int8:\n        return torch.int8\n    elif dtype == trt.int32:\n        return torch.int32\n    elif dtype == trt.float16:\n        return torch.float16\n    elif dtype == trt.float32:\n        return torch.float32\n    else:\n        raise TypeError(f'{dtype} is not supported by torch')\n\n\nclass TRTWrapper(torch.nn.Module):\n\n    def __init__(self,\n                 engine: Union[str, trt.ICudaEngine],\n                 output_names: Optional[Sequence[str]] = None) -> None:\n        super().__init__()\n        self.engine = engine\n        if isinstance(self.engine, str):\n            with trt.Logger() as logger, trt.Runtime(logger) as runtime:\n                with open(self.engine, mode='rb') as f:\n                    engine_bytes = f.read()\n                self.engine = runtime.deserialize_cuda_engine(engine_bytes)\n        self.context = self.engine.create_execution_context()\n        names = [_ for _ in self.engine]\n        input_names = list(filter(self.engine.binding_is_input, names))\n        self._input_names = input_names\n        self._output_names = output_names\n\n        if self._output_names is None:\n            output_names = list(set(names) - set(input_names))\n            self._output_names = output_names\n\n    def forward(self, inputs: Dict[str, torch.Tensor]):\n        bindings = [None] * (len(self._input_names) + len(self._output_names))\n        for input_name, input_tensor in inputs.items():\n            idx = self.engine.get_binding_index(input_name)\n            self.context.set_binding_shape(idx, tuple(input_tensor.shape))\n            bindings[idx] = input_tensor.contiguous().data_ptr()\n\n            # create output tensors\n        outputs = {}\n        for output_name in self._output_names:\n            idx = self.engine.get_binding_index(output_name)\n            dtype = torch_dtype_from_trt(self.engine.get_binding_dtype(idx))\n            shape = tuple(self.context.get_binding_shape(idx))\n\n            device = torch.device('cuda')\n            output = torch.zeros(size=shape, dtype=dtype, device=device)\n            outputs[output_name] = output\n            bindings[idx] = output.data_ptr()\n        self.context.execute_async_v2(bindings,\n                                      torch.cuda.current_stream().cuda_stream)\n        return outputs\n\n\ndef get_plugin_names():\n    return [pc.name for pc in trt.get_plugin_registry().plugin_creator_list]\n\n\ndef main():\n\n    load_tensorrt_plugin()\n\n    args = parse_args()\n\n    cfg = Config.fromfile(args.config)\n    cfg.model.pretrained = None\n    cfg.model.type = cfg.model.type + 'TRT'\n    cfg = compat_cfg(cfg)\n    cfg.gpu_ids = [0]\n\n    # build dataloader\n    assert cfg.data.test.test_mode\n    test_dataloader_default_args = dict(\n        samples_per_gpu=1, workers_per_gpu=2, dist=False, shuffle=False)\n    test_loader_cfg = {\n        **test_dataloader_default_args,\n        **cfg.data.get('test_dataloader', {})\n    }\n    dataset = build_dataset(cfg.data.test)\n    data_loader = build_dataloader(dataset, **test_loader_cfg)\n\n    # build the model\n    cfg.model.train_cfg = None\n    model = build_model(cfg.model, test_cfg=cfg.get('test_cfg'))\n\n    # build tensorrt model\n    trt_model = TRTWrapper(args.engine, [f'output_{i}' for i in range(36)])\n\n    num_warmup = 50\n    pure_inf_time = 0\n\n    init_ = True\n    metas = dict()\n    # benchmark with several samples and take the average\n    for i, data in enumerate(data_loader):\n        if init_:\n            inputs = [t.cuda() for t in data['img_inputs'][0]]\n            metas_ = model.get_bev_pool_input(inputs)\n            metas = dict(\n                ranks_bev=metas_[0].int().contiguous(),\n                ranks_depth=metas_[1].int().contiguous(),\n                ranks_feat=metas_[2].int().contiguous(),\n                interval_starts=metas_[3].int().contiguous(),\n                interval_lengths=metas_[4].int().contiguous())\n            init_ = False\n        img = data['img_inputs'][0][0].cuda().squeeze(0).contiguous()\n        torch.cuda.synchronize()\n        start_time = time.perf_counter()\n        trt_output = trt_model.forward(dict(img=img, **metas))\n\n        # postprocessing\n        if args.postprocessing:\n            trt_output = [trt_output[f'output_{i}'] for i in range(36)]\n            pred = model.result_deserialize(trt_output)\n            img_metas = [dict(box_type_3d=LiDARInstance3DBoxes)]\n            bbox_list = model.pts_bbox_head.get_bboxes(\n                pred, img_metas, rescale=True)\n            bbox_results = [\n                bbox3d2result(bboxes, scores, labels)\n                for bboxes, scores, labels in bbox_list\n            ]\n        torch.cuda.synchronize()\n        elapsed = time.perf_counter() - start_time\n\n        if i >= num_warmup:\n            pure_inf_time += elapsed\n            if (i + 1) % 50 == 0:\n                fps = (i + 1 - num_warmup) / pure_inf_time\n                print(f'Done image [{i + 1:<3}/ {args.samples}], '\n                      f'fps: {fps:.1f} img / s')\n\n        if (i + 1) == args.samples:\n            pure_inf_time += elapsed\n            fps = (i + 1 - num_warmup) / pure_inf_time\n            print(f'Overall \\nfps: {fps:.1f} img / s '\n                  f'\\ninference time: {1000/fps:.1f} ms')\n            return fps\n\n\nif __name__ == '__main__':\n    fps = main()\n"
  },
  {
    "path": "tools/analysis_tools/benchmark_view_transformer.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport time\n\nimport numpy as np\nimport torch\nfrom mmcv import Config\nfrom mmcv.parallel import MMDataParallel\nfrom mmcv.runner import load_checkpoint\n\nfrom mmdet3d.datasets import build_dataloader, build_dataset\nfrom mmdet3d.models import build_detector\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='MMDet benchmark a model')\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument('checkpoint', help='checkpoint file')\n    parser.add_argument('--samples', default=1000, help='samples to benchmark')\n    parser.add_argument(\n        '--log-interval', default=50, help='interval of logging')\n    parser.add_argument(\n        '--mem-only',\n        action='store_true',\n        help='Conduct the memory analysis only')\n    parser.add_argument(\n        '--no-acceleration',\n        action='store_true',\n        help='Omit the pre-computation acceleration')\n    args = parser.parse_args()\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    cfg = Config.fromfile(args.config)\n    # set cudnn_benchmark\n    if cfg.get('cudnn_benchmark', False):\n        torch.backends.cudnn.benchmark = True\n    cfg.model.pretrained = None\n    cfg.data.test.test_mode = True\n\n    # build the dataloader\n    # TODO: support multiple images per gpu (only minor changes are needed)\n    dataset = build_dataset(cfg.data.test)\n    data_loader = build_dataloader(\n        dataset,\n        samples_per_gpu=1,\n        workers_per_gpu=cfg.data.workers_per_gpu,\n        dist=False,\n        shuffle=False)\n\n    # build the model and load checkpoint\n    if not args.no_acceleration:\n        cfg.model.img_view_transformer.accelerate=True\n    cfg.model.train_cfg = None\n    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))\n    load_checkpoint(model, args.checkpoint, map_location='cpu')\n    model = MMDataParallel(model, device_ids=[0])\n\n    model.eval()\n\n    # the first several iterations may be very slow so skip them\n    num_warmup = 100\n    pure_inf_time = 0\n    D = model.module.img_view_transformer.D\n    out_channels = model.module.img_view_transformer.out_channels\n    depth_net = model.module.img_view_transformer.depth_net\n    view_transformer = model.module.img_view_transformer\n    # benchmark with several samples and take the average\n    for i, data in enumerate(data_loader):\n\n        with torch.no_grad():\n            img_feat = \\\n                model.module.image_encoder(data['img_inputs'][0][0].cuda())\n            B, N, C, H, W = img_feat.shape\n            x = depth_net(img_feat.reshape(B * N, C, H, W))\n            depth_digit = x[:, :D, ...]\n            tran_feat = x[:, D:D + out_channels, ...]\n            depth = depth_digit.softmax(dim=1)\n        input = [img_feat] + [d.cuda() for d in data['img_inputs'][0][1:]]\n\n        if i == 0:\n            precomputed_memory_allocated = 0.0\n            if view_transformer.accelerate:\n                start_mem_allocated = torch.cuda.memory_allocated()\n                view_transformer.pre_compute(input)\n                end_mem_allocated = torch.cuda.memory_allocated()\n                precomputed_memory_allocated = \\\n                    end_mem_allocated - start_mem_allocated\n                ref_max_mem_allocated = torch.cuda.max_memory_allocated()\n                # occupy the memory\n                size = (ref_max_mem_allocated - end_mem_allocated) // 4\n                occupy_tensor = torch.zeros(\n                    size=(size, ), device='cuda', dtype=torch.float32)\n            print('Memory analysis: \\n'\n                  'precomputed_memory_allocated : %d B / %.01f MB \\n' %\n                  (precomputed_memory_allocated,\n                   precomputed_memory_allocated / 1024 / 1024))\n            start_mem_allocated = torch.cuda.memory_allocated()\n            bev_feat = view_transformer.view_transform_core(\n                input, depth, tran_feat)[0]\n            end_max_mem_allocated = torch.cuda.max_memory_allocated()\n            peak_memory_allocated = \\\n                end_max_mem_allocated - start_mem_allocated\n            total_memory_requirement = \\\n                precomputed_memory_allocated + peak_memory_allocated\n            print('Memory analysis: \\n'\n                  'Memory requirement : %d B / %.01f MB \\n' %\n                  (total_memory_requirement,\n                   total_memory_requirement / 1024 / 1024))\n            if args.mem_only:\n                return\n\n        torch.cuda.synchronize()\n        start_time = time.perf_counter()\n        with torch.no_grad():\n            view_transformer.view_transform(input, depth, tran_feat)[0]\n        torch.cuda.synchronize()\n        elapsed = time.perf_counter() - start_time\n\n        if i >= num_warmup:\n            pure_inf_time += elapsed\n            if (i + 1) % args.log_interval == 0:\n                fps = (i + 1 - num_warmup) / pure_inf_time\n                print(f'Done image [{i + 1:<3}/ {args.samples}], '\n                      f'fps: {fps:.1f} img / s')\n\n        if (i + 1) == args.samples:\n            pure_inf_time += elapsed\n            fps = (i + 1 - num_warmup) / pure_inf_time\n            print(f'Overall fps: {fps:.1f} img / s')\n            return fps\n\n\nif __name__ == '__main__':\n    repeat_times = 1\n    fps_list = []\n    for _ in range(repeat_times):\n        fps = main()\n        time.sleep(5)\n        fps_list.append(fps)\n    fps_list = np.array(fps_list, dtype=np.float32)\n    print(f'Mean Overall fps: {fps_list.mean():.4f} +'\n          f' {np.sqrt(fps_list.var()):.4f} img / s')\n"
  },
  {
    "path": "tools/analysis_tools/create_video.py",
    "content": "import random as rd\nimport cv2 as cv\nimport numpy as np\n\n\n\nclass RecordMovie(object):\n\n    def __init__(self, img_width, img_height):\n        self.video_writer = None \n        self.is_end = False \n        self.img_width = img_width  \n        self.img_height = img_height \n\n   \n    def start(self, file_name, freq):\n\n        four_cc = cv.VideoWriter_fourcc(*'mp4v')\n        img_size = (self.img_width, self.img_height)  \n\n\n        self.video_writer = cv.VideoWriter()\n        self.video_writer.open(file_name, four_cc, freq, img_size, True)\n\n      \n    def record(self, img):\n        if self.is_end is False:\n            self.video_writer.write(img)\n\n\n    def end(self):\n        self.is_end = True\n        self.video_writer.release()\n\nimport os\nimport mmcv\ndef main_waymo():\n    rm = RecordMovie(200, 200)\n    rm.start(\"test_waymo.mp4\", 10)\n    # base_path = 'test/anchor_traintest_noflip_1.0/Fri_Jun__3_17_10_33_2022/show_dirs/testing_camera/image_0/'\n    files = os.listdir('/mount/data/lsbevv2/vis')\n    for i in range(320):\n       \n        imgs = cv.imread(os.path.join('/mount/data/lsbevv2/vis', f'a_{i}.png'))\n        print(i)\n        print(imgs.shape)\n        rm.record(imgs)\n    rm.end()\n\nif __name__ == '__main__':\n    #main_nuscenes()\n    main_waymo()"
  },
  {
    "path": "tools/analysis_tools/generate_mask_based_on_lidar_points.py",
    "content": "from mmdet3d.datasets import build_dataset\nimport mmcv\nfrom mmcv import Config, DictAction\nfrom mmdet3d.datasets import build_dataset\ncfg = Config.fromfile('/mount/data/lsbevv2/occupancy_configs/occupancy/debug.py')\ndataset = build_dataset(cfg.data.test, dict(test_mode=True))\nimport numpy as np\nimport torch\n\nimport numpy as np\nimport torch\nimport matplotlib.pyplot as plt\nimport cv2\nimport torch\nfrom torchvision.utils import make_grid\nimport torchvision\nimport matplotlib.pyplot as plt\nimport cv2\nimport json\nimport os\ndef convert_color(img_path):\n    plt.figure()\n    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)\n    plt.imsave(img_path, img, cmap=plt.get_cmap('viridis'))\n    plt.close()\n\n\ndef save_tensor(tensor, path, pad_value=254.0,normalize=False):\n    print('save_tensor', path)\n    tensor = tensor.to(torch.float).detach().cpu()\n    max_ = tensor.flatten(1).max(-1).values[:, None, None]\n    min_ = tensor.flatten(1).min(-1).values[:, None, None]\n    tensor = (tensor-min_)/(max_-min_)\n    if tensor.type() == 'torch.BoolTensor':\n        tensor = tensor*255\n    if len(tensor.shape) == 3:\n        tensor = tensor.unsqueeze(1)\n    tensor = make_grid(tensor, pad_value=pad_value, normalize=normalize).permute(1, 2, 0).numpy().copy()\n    torchvision.utils.save_image(torch.tensor(tensor).permute(2, 0, 1), path)\n    convert_color(path)\n\n\ndef generate_forward_transformation_matrix(bda, img_meta_dict=None):\n    b = bda.size(0)\n    hom_res = torch.eye(4)[None].repeat(b, 1, 1).to(bda.device)\n    for i in range(b):\n        hom_res[i, :3, :3] = bda[i]\n    return hom_res\n\n\nfrom segment_anything import sam_model_registry, SamPredictor\n\n\n\n\ndef show_mask(mask, ax, random_color=False, cls_=None):\n    classname_to_color= {'ignore_class': (255, 255, 255),  \n                'barrier': (112, 128, 144),  # Slategrey\n                'bicycle': (220, 20, 60),  # Crimson\n                'bus': (255, 127, 80),  # Coral\n                'car': (255, 158, 0),  # Orange\n                'construction_vehicle': (233, 150, 70),  # Darksalmon\n                'motorcycle': (255, 61, 99),  # Red\n                'pedestrian': (0, 0, 230),  # Blue\n                'traffic_cone': (47, 79, 79),  # Darkslategrey\n                'trailer': (255, 140, 0),  # Darkorange\n                'truck': (255, 99, 71),  # Tomato\n                'driveable_surface': (0, 207, 191),  # nuTonomy green\n                'other_flat': (175, 0, 75),\n                'sidewalk': (75, 0, 75),\n                'terrain': (112, 180, 60),\n                'manmade': (222, 184, 135),  # Burlywood\n                'vegetation': (0, 175, 0)}\n\n    colors = np.array(list(classname_to_color.values())).astype(np.uint8)\n    alpha = np.ones((colors.shape[0], 1), dtype=np.uint8) * 0.5\n    colors = np.hstack([colors/255, alpha])\n    \n    if random_color:\n        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)\n    elif cls_ is not None:\n        color = colors[cls_]\n    else:\n        color = np.array([30/255, 144/255, 255/255, 0.6])\n    \n    h, w = mask.shape[-2:]\n    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)\n    ax.imshow(mask_image)\n    \ndef show_points(coords, labels, ax, marker_size=375):\n    pos_points = coords[labels==1]\n    neg_points = coords[labels==0]\n    ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)\n    ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)   \n    \ndef show_box(box, ax):\n    x0, y0 = box[0], box[1]\n    w, h = box[2] - box[0], box[3] - box[1]\n    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2))    \n\nidx_to_name = mmcv.load('/mount/data/lsbevv2/data/nuscenes/v1.0-trainval/category.json')\nidx_to_name = [each['name'] for each in idx_to_name]\nname_category = {'animal':0,\n'human.pedestrian.personal_mobility':0,\n'human.pedestrian.stroller':0,\n'human.pedestrian.wheelchair':0,\n'movable_object.debris':0,\n'movable_object.pushable_pullable':0,\n'static_object.bicycle_rack':0,\n'vehicle.emergency.ambulance':0,\n'vehicle.emergency.police':0,\n'noise':0,\n'static.other':0,\n'vehicle.ego':0,\n'movable_object.barrier':1,\n'vehicle.bicycle':2,\n'vehicle.bus.bendy':3,\n'vehicle.bus.rigid':3,\n'vehicle.car':4,\n'vehicle.construction':5,\n'vehicle.motorcycle':6,\n'human.pedestrian.adult':7,\n'human.pedestrian.child':7,\n'human.pedestrian.construction_worker': 7,\n'human.pedestrian.police_officer':7,\n'movable_object.trafficcone': 8,\n'vehicle.trailer': 9,\n'vehicle.truck': 10,\n'flat.driveable_surface': 11,\n'flat.other': 12,\n'flat.sidewalk':  13,\n'flat.terrain':  14,\n'static.manmade':  15,\n'static.vegetation': 16}\n\nidx_to_category = [name_category[each] for each in idx_to_name]\n\nfrom segment_anything import sam_model_registry, SamPredictor\n\nsam_checkpoint = \"/mount/data/segment-anything/sam_vit_h_4b8939.pth\"\nmodel_type = \"vit_h\"\n\ndevice = \"cuda\"\n\nsam = sam_model_registry[model_type](checkpoint=sam_checkpoint)\nsam.to(device=device)\npredictor = SamPredictor(sam)\n\n# front_1 = './data/nuscenes/samples/CAM_FRONT_LEFT/n015-2018-07-11-11-54-16+0800__CAM_FRONT_LEFT__1531281439754844.jpg'\n# import cv2\n\n# image = cv2.imread(front_1)\n# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\nimport json\nfrom collections import defaultdict\nfile_path = '/mount/data/lsbevv2/data/nuscenes/bevdetv2-nuscenes_infos_val.coco.json'\ndata = json.load(open(file_path, 'r'))\ncategory_map_from_det_to_set = {\n        0:4,\n        1:10,\n        2:9,\n        3:3,\n        4:5,\n        5:2,\n        6:6,\n        7:7,\n        8:8,\n        9:1\n    }\nsample_map = defaultdict(lambda: [])\nimage_map = defaultdict(lambda: [])\nfor each in data['images']:\n    sample_map[each['token']].append(each['id'])\nfor i, each in enumerate(data['annotations']):\n    image_map[each['image_id']].append(i)\n    \n\nimport argparse\nimport random\nfrom tqdm import tqdm\ndef f(gap=0):\n    co = 0\n    for i in tqdm(range(gap, len(dataset))):\n        co +=1\n        print(i)\n        info = dataset[i]\n        category_map = info['gt_depth'][0]\n        for j in range(len(idx_to_category)):\n            category_map[category_map==j] = idx_to_category[j]\n        imgs =  info['img_inputs'][0][0]\n        cams = [\n            'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',\n            'CAM_BACK', 'CAM_BACK_RIGHT'\n        ]\n        for ind, img in enumerate(imgs):\n            img = img.permute(1, 2, 0).to(torch.uint8)\n            image = img.cpu().numpy()\n            predictor.set_image(image)\n            per_category_map = category_map[ind]\n\n            sample_data_token = info['img_metas'][0].data['curr']['cams'][cams[ind]]['sample_data_token']\n            # if os.path.isfile(f'/mount/data/lsbevv2/data/nus_sem/{sample_data_token}.png'): continue\n\n            bboxes =[data['annotations'][each_idx] for each_idx in image_map[sample_data_token]]\n            input_boxes = []\n            for bbox in bboxes:\n                bbox['category_id'] = category_map_from_det_to_set[bbox['category_id']]\n                x, y, w, h = bbox['bbox']\n                input_boxes.append([x, y, x+w, y+h])\n                # input_box = np.array([x, y, x+w, y+h]) # xyxy format\n            input_boxes = torch.tensor(input_boxes, device=predictor.device)\n            transformed_boxes = predictor.transform.apply_boxes_torch(input_boxes, image.shape[:2])\n            sem_masks = np.zeros([17, 900, 1600]) + 0.05\n            thing_mask = np.zeros([900, 1600])\n            if len(input_boxes)>0:\n                try:\n                    masks, scores, logits = predictor.predict_torch(\n                        point_coords=None,\n                        point_labels=None,\n                        boxes=torch.tensor(transformed_boxes).to(device),\n                        multimask_output=False,\n                        return_logits=False,\n                    )\n                    masks, scores = masks.squeeze(1).cpu().numpy(), scores.squeeze(1).cpu().numpy()\n\n                    for index, mask in enumerate(masks):\n                        id = bboxes[index]['category_id']\n                        sem_masks[id][mask] = scores[index] + 0.4 # 0.4 is the bias of bbox prompt campared  to point prompt\n                        thing_mask[mask] = 1\n                except:\n                    print(sample_data_token, ' thing error!!!!')\n\n            for stuff_class in [11, 12, 13, 14, 15, 16]:\n                points = torch.tensor((per_category_map == stuff_class).nonzero())\n                if points.size(0)==0: continue\n                else:\n                    xs = [each[0].item() for each in points]\n                    ys = [each[1].item() for each in points]\n                    points = points[thing_mask[xs, ys]==0]\n                    if points.size(0)==0: continue\n                    if points.size(0)<=5:\n                        points = random.choices(points, k=min(3, points.size(0)))\n                    else:\n                        try:\n                            y = points[:, 0].to(torch.float).mean()\n                            x = points[:, 1].to(torch.float).mean()\n                            right_up = random.choices(points[(points[:,0]>=y) & (points[:,1]>=x)], k=1)\n                            left_up =  random.choices(points[(points[:,0]<y) & (points[:,1]>=x)], k=1)\n                            right_bottom =  random.choices(points[(points[:,0]>=y) & (points[:,1]<x)], k=1)\n                            left_bottom =  random.choices(points[(points[:,0]<y) & (points[:,1]<x)], k=1)\n                            points = right_up + left_up + right_bottom + left_bottom\n                        except:\n                            points = random.choices(points, k=min(3, points.size(0)))\n\n                    input_point = np.array([each.cpu().numpy() for each in points])[:,::-1]\n                    input_label = np.array([stuff_class for _ in range(len(input_point))])\n                    try:\n                        masks, scores, logits = predictor.predict(\n                            point_coords=input_point,\n                            point_labels=input_label,\n                            multimask_output=False,\n                            return_logits=False,\n                        )\n                        sem_masks[stuff_class][masks[0]] = scores[0]\n                    except:\n                        print(sample_data_token, stuff_class, ' stuff_error')\n\n            sem_masks = torch.from_numpy(sem_masks).permute(1, 2, 0).argmax(-1).numpy()\n            # np.save(f'/mount/data/lsbevv2/data/nus_sem/{sample_data_token}.npy', mask=sem_masks.astype(np.uint8))\n            mmcv.imwrite(sem_masks, f'/mount/data/lsbevv2/data/nus_sem/{sample_data_token}.png')\n            # sem_masks_ = mmcv.imread( f'/mount/data/lsbevv2/data/nus_sem/{sample_data_token}.jpg', flag='grayscale')\n            # save_tensor(torch.tensor(sem_masks), 'tensor_{i}_{ind}.png'.format(i=i, ind=ind))\n            # # .permute(2, 0, 1).numpy()\n\n            # plt.figure(figsize=(10,10))\n            # plt.imshow(image)\n\n            # for p in range(17):     \n            #     print(p, (sem_masks==p).sum())   \n            #     show_mask(sem_masks==p, plt.gca(), random_color=False, cls_=p)\n            # plt.axis('off')\n            # f = plt.gcf()\n            # f.savefig('a_{i}_{ind}.png'.format(i=i, ind=ind))\n            # f.clear()\n\n    # if i==5:break\n                \n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser(description='gap')\n    parser.add_argument('gap', default=0, type=int, help='gap')\n    args = parser.parse_args()\n    f(gap=args.gap)\n\n"
  },
  {
    "path": "tools/analysis_tools/get_flops.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\n\nimport torch\nfrom mmcv import Config, DictAction\n\nfrom mmdet3d.models import build_model\n\ntry:\n    from mmcv.cnn import get_model_complexity_info\nexcept ImportError:\n    raise ImportError('Please upgrade mmcv to >0.6.2')\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Train a detector')\n    parser.add_argument('config', help='train config file path')\n    parser.add_argument(\n        '--shape',\n        type=int,\n        nargs='+',\n        default=[40000, 4],\n        help='input point cloud size')\n    parser.add_argument(\n        '--modality',\n        type=str,\n        default='point',\n        choices=['point', 'image', 'multi'],\n        help='input data modality')\n    parser.add_argument(\n        '--cfg-options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file. If the value to '\n        'be overwritten is a list, it should be like key=\"[a,b]\" or key=a,b '\n        'It also allows nested list/tuple values, e.g. key=\"[(a,b),(c,d)]\" '\n        'Note that the quotation marks are necessary and that no white space '\n        'is allowed.')\n    args = parser.parse_args()\n    return args\n\n\ndef construct_input(input_shape):\n    rot = torch.eye(3).float().cuda().view(1, 3, 3)\n    rot = torch.cat([rot for _ in range(6)], axis=0).view(1, 6, 3, 3)\n\n    input = dict(img_inputs=[\n        torch.ones(()).new_empty((1, 6, 3, *input_shape)).cuda(), rot,\n        torch.ones((1, 6, 3)).cuda(), rot, rot,\n        torch.ones((1, 6, 3)).cuda(),\n        torch.eye(3).float().cuda().view(1, 3, 3)\n    ])\n    return input\n\n\ndef main():\n\n    args = parse_args()\n\n    if args.modality == 'point':\n        assert len(args.shape) == 2, 'invalid input shape'\n        input_shape = tuple(args.shape)\n    elif args.modality == 'image':\n        if len(args.shape) == 1:\n            input_shape = (3, args.shape[0], args.shape[0])\n        elif len(args.shape) == 2:\n            input_shape = (3, ) + tuple(args.shape)\n        else:\n            raise ValueError('invalid input shape')\n    elif args.modality == 'multi':\n        raise NotImplementedError(\n            'FLOPs counter is currently not supported for models with '\n            'multi-modality input')\n\n    cfg = Config.fromfile(args.config)\n    if args.cfg_options is not None:\n        cfg.merge_from_dict(args.cfg_options)\n\n    model = build_model(\n        cfg.model,\n        train_cfg=cfg.get('train_cfg'),\n        test_cfg=cfg.get('test_cfg'))\n    if torch.cuda.is_available():\n        model.cuda()\n    model.eval()\n\n    if hasattr(model, 'forward_dummy'):\n        model.forward = model.forward_dummy\n    else:\n        raise NotImplementedError(\n            'FLOPs counter is currently not supported for {}'.format(\n                model.__class__.__name__))\n\n    flops, params = get_model_complexity_info(\n        model, input_shape, input_constructor=construct_input)\n    split_line = '=' * 30\n    print(f'{split_line}\\nInput shape: {input_shape}\\n'\n          f'Flops: {flops}\\nParams: {params}\\n{split_line}')\n    print('!!!Please be cautious if you use the results in papers. '\n          'You may need to check if all ops are supported and verify that the '\n          'flops computation is correct.')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/analysis_tools/model_converter.py",
    "content": "import torch\n\nmodel = torch.load('/mount/data/FBBEV/work_dirs/mappetrv3_noaug_8x8_36ep_102x102/iter_31644_ema.pth')\n\n\nkeys = list(model['state_dict'].keys())\n\nfor k in keys:\n    model['state_dict'][k.replace('pts_bbox_head', 'uni_perceive_head')] = model['state_dict'][k]\n\ntorch.save(model, '/mount/data/FBBEV/work_dirs/mappetrv3_noaug_8x8_36ep_102x102/iter_31644_ema2.pth')"
  },
  {
    "path": "tools/analysis_tools/occupancy_cbgs.py",
    "content": "import os \nimport os.path as osp\nimport sys\nimport mmcv\nimport numpy as np\nfrom collections import Counter, defaultdict\nfrom tqdm import tqdm\ntotal_counter = defaultdict(lambda: 0)\ninfo = mmcv.load('/mount/dnn_data/occupancy_2023/annotations.json')\np1 = '/mount/dnn_data/occupancy_2023/gts'\njson_map = {}\nscenes = os.listdir(p1)\nfor scene in tqdm(info['train_split']):\n    for sample in os.listdir(osp.join(p1, scene)):\n        data = np.load(osp.join(p1, scene, sample, 'labels.npz'))\n        occupancy = data['semantics']\n        visible_mask = data['mask_camera']\n        index = (visible_mask>0).nonzero()\n        seen = occupancy[index[0],index[1],index[2]]\n        counter = Counter(seen)\n        json_map[sample] = {}\n        for a,b in counter.items():\n            total_counter[int(a)]+=b\n            json_map[sample][int(a)] = b\nfrom IPython import embed\nembed()\nexit()\nnew_json_map = {}\n\nfor key in json_map.keys()\n    new_json_map[key] = {}\n    for k, v in json_map[key].items():\n        new_json_map[key][int(k)] = int(v)\n# for scene in scenes:\n"
  },
  {
    "path": "tools/analysis_tools/vis.py",
    "content": "# Copyright (c) Phigent Robotics. All rights reserved.\nimport argparse\nimport json\nimport os\nimport pickle\n\nimport cv2\nimport numpy as np\nfrom pyquaternion.quaternion import Quaternion\n\nfrom mmdet3d.core.bbox.structures.lidar_box3d import LiDARInstance3DBoxes as LB\n\n\ndef check_point_in_img(points, height, width):\n    valid = np.logical_and(points[:, 0] >= 0, points[:, 1] >= 0)\n    valid = np.logical_and(\n        valid, np.logical_and(points[:, 0] < width, points[:, 1] < height))\n    return valid\n\n\ndef depth2color(depth):\n    gray = max(0, min((depth + 2.5) / 3.0, 1.0))\n    max_lumi = 200\n    colors = np.array(\n        [[max_lumi, 0, max_lumi], [max_lumi, 0, 0], [max_lumi, max_lumi, 0],\n         [0, max_lumi, 0], [0, max_lumi, max_lumi], [0, 0, max_lumi]],\n        dtype=np.float32)\n    if gray == 1:\n        return tuple(colors[-1].tolist())\n    num_rank = len(colors) - 1\n    rank = np.floor(gray * num_rank).astype(np.int)\n    diff = (gray - rank / num_rank) * num_rank\n    return tuple(\n        (colors[rank] + (colors[rank + 1] - colors[rank]) * diff).tolist())\n\n\ndef lidar2img(points_lidar, camrera_info):\n    points_lidar_homogeneous = \\\n        np.concatenate([points_lidar,\n                        np.ones((points_lidar.shape[0], 1),\n                                dtype=points_lidar.dtype)], axis=1)\n    camera2lidar = np.eye(4, dtype=np.float32)\n    camera2lidar[:3, :3] = camrera_info['sensor2lidar_rotation']\n    camera2lidar[:3, 3] = camrera_info['sensor2lidar_translation']\n    lidar2camera = np.linalg.inv(camera2lidar)\n    points_camera_homogeneous = points_lidar_homogeneous @ lidar2camera.T\n    points_camera = points_camera_homogeneous[:, :3]\n    valid = np.ones((points_camera.shape[0]), dtype=bool)\n    valid = np.logical_and(points_camera[:, -1] > 0.5, valid)\n    points_camera = points_camera / points_camera[:, 2:3]\n    camera2img = camrera_info['cam_intrinsic']\n    points_img = points_camera @ camera2img.T\n    points_img = points_img[:, :2]\n    return points_img, valid\n\n\ndef get_lidar2global(infos):\n    lidar2ego = np.eye(4, dtype=np.float32)\n    lidar2ego[:3, :3] = Quaternion(infos['lidar2ego_rotation']).rotation_matrix\n    lidar2ego[:3, 3] = infos['lidar2ego_translation']\n    ego2global = np.eye(4, dtype=np.float32)\n    ego2global[:3, :3] = Quaternion(\n        infos['ego2global_rotation']).rotation_matrix\n    ego2global[:3, 3] = infos['ego2global_translation']\n    return ego2global @ lidar2ego\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Visualize the predicted '\n                                     'result of nuScenes')\n    parser.add_argument(\n        'res', help='Path to the predicted result in json format')\n    parser.add_argument(\n        '--show-range',\n        type=int,\n        default=50,\n        help='Range of visualization in BEV')\n    parser.add_argument(\n        '--canva-size', type=int, default=1000, help='Size of canva in pixel')\n    parser.add_argument(\n        '--vis-frames',\n        type=int,\n        default=500,\n        help='Number of frames for visualization')\n    parser.add_argument(\n        '--scale-factor',\n        type=int,\n        default=4,\n        help='Trade-off between image-view and bev in size of '\n        'the visualized canvas')\n    parser.add_argument(\n        '--vis-thred',\n        type=float,\n        default=0.3,\n        help='Threshold the predicted results')\n    parser.add_argument('--draw-gt', action='store_true')\n    parser.add_argument(\n        '--version',\n        type=str,\n        default='val',\n        help='Version of nuScenes dataset')\n    parser.add_argument(\n        '--root_path',\n        type=str,\n        default='./data/nuscenes',\n        help='Path to nuScenes dataset')\n    parser.add_argument(\n        '--save_path',\n        type=str,\n        default='./vis',\n        help='Path to save visualization results')\n    parser.add_argument(\n        '--format',\n        type=str,\n        default='video',\n        choices=['video', 'image'],\n        help='The desired format of the visualization result')\n    parser.add_argument(\n        '--fps', type=int, default=20, help='Frame rate of video')\n    parser.add_argument(\n        '--video-prefix', type=str, default='vis', help='name of video')\n    args = parser.parse_args()\n    return args\n\n\ncolor_map = {0: (255, 255, 0), 1: (0, 255, 255)}\n\n\ndef main():\n    args = parse_args()\n    # load predicted results\n    res = json.load(open(args.res, 'r'))\n    # load dataset information\n    info_path = \\\n        args.root_path + '/bevdetv2-nuscenes_infos_%s.pkl' % args.version\n    dataset = pickle.load(open(info_path, 'rb'))\n    # prepare save path and medium\n    vis_dir = args.save_path\n    if not os.path.exists(vis_dir):\n        os.makedirs(vis_dir)\n    print('saving visualized result to %s' % vis_dir)\n    scale_factor = args.scale_factor\n    canva_size = args.canva_size\n    show_range = args.show_range\n    if args.format == 'video':\n        fourcc = cv2.VideoWriter_fourcc(*'MP4V')\n        vout = cv2.VideoWriter(\n            os.path.join(vis_dir, '%s.mp4' % args.video_prefix), fourcc,\n            args.fps, (int(1600 / scale_factor * 3),\n                       int(900 / scale_factor * 2 + canva_size)))\n\n    draw_boxes_indexes_bev = [(0, 1), (1, 2), (2, 3), (3, 0)]\n    draw_boxes_indexes_img_view = [(0, 1), (1, 2), (2, 3), (3, 0), (4, 5),\n                                   (5, 6), (6, 7), (7, 4), (0, 4), (1, 5),\n                                   (2, 6), (3, 7)]\n    views = [\n        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',\n        'CAM_BACK', 'CAM_BACK_RIGHT'\n    ]\n    print('start visualizing results')\n    for cnt, infos in enumerate(\n            dataset['infos'][:min(args.vis_frames, len(dataset['infos']))]):\n        if cnt % 10 == 0:\n            print('%d/%d' % (cnt, min(args.vis_frames, len(dataset['infos']))))\n        # collect instances\n        pred_res = res['results'][infos['token']]\n        pred_boxes = [\n            pred_res[rid]['translation'] + pred_res[rid]['size'] + [\n                Quaternion(pred_res[rid]['rotation']).yaw_pitch_roll[0] +\n                np.pi / 2\n            ] for rid in range(len(pred_res))\n        ]\n        if len(pred_boxes) == 0:\n            corners_lidar = np.zeros((0, 3), dtype=np.float32)\n        else:\n            pred_boxes = np.array(pred_boxes, dtype=np.float32)\n            boxes = LB(pred_boxes, origin=(0.5, 0.5, 0.0))\n            corners_global = boxes.corners.numpy().reshape(-1, 3)\n            corners_global = np.concatenate(\n                [corners_global,\n                 np.ones([corners_global.shape[0], 1])],\n                axis=1)\n            l2g = get_lidar2global(infos)\n            corners_lidar = corners_global @ np.linalg.inv(l2g).T\n            corners_lidar = corners_lidar[:, :3]\n        pred_flag = np.ones((corners_lidar.shape[0] // 8, ), dtype=np.bool)\n        scores = [\n            pred_res[rid]['detection_score'] for rid in range(len(pred_res))\n        ]\n        if args.draw_gt:\n            gt_boxes = infos['gt_boxes']\n            gt_boxes[:, -1] = gt_boxes[:, -1] + np.pi / 2\n            width = gt_boxes[:, 4].copy()\n            gt_boxes[:, 4] = gt_boxes[:, 3]\n            gt_boxes[:, 3] = width\n            corners_lidar_gt = \\\n                LB(infos['gt_boxes'],\n                   origin=(0.5, 0.5, 0.5)).corners.numpy().reshape(-1, 3)\n            corners_lidar = np.concatenate([corners_lidar, corners_lidar_gt],\n                                           axis=0)\n            gt_flag = np.ones((corners_lidar_gt.shape[0] // 8), dtype=np.bool)\n            pred_flag = np.concatenate(\n                [pred_flag, np.logical_not(gt_flag)], axis=0)\n            scores = scores + [0 for _ in range(infos['gt_boxes'].shape[0])]\n        scores = np.array(scores, dtype=np.float32)\n        sort_ids = np.argsort(scores)\n\n        # image view\n        imgs = []\n        for view in views:\n            img = cv2.imread(infos['cams'][view]['data_path'])\n            # draw instances\n            corners_img, valid = lidar2img(corners_lidar, infos['cams'][view])\n            valid = np.logical_and(\n                valid,\n                check_point_in_img(corners_img, img.shape[0], img.shape[1]))\n            valid = valid.reshape(-1, 8)\n            corners_img = corners_img.reshape(-1, 8, 2).astype(np.int)\n            for aid in range(valid.shape[0]):\n                for index in draw_boxes_indexes_img_view:\n                    if valid[aid, index[0]] and valid[aid, index[1]]:\n                        cv2.line(\n                            img,\n                            corners_img[aid, index[0]],\n                            corners_img[aid, index[1]],\n                            color=color_map[int(pred_flag[aid])],\n                            thickness=scale_factor)\n            imgs.append(img)\n\n        # bird-eye-view\n        canvas = np.zeros((int(canva_size), int(canva_size), 3),\n                          dtype=np.uint8)\n        # draw lidar points\n        lidar_points = np.fromfile(infos['lidar_path'], dtype=np.float32)\n        lidar_points = lidar_points.reshape(-1, 5)[:, :3]\n        lidar_points[:, 1] = -lidar_points[:, 1]\n        lidar_points[:, :2] = \\\n            (lidar_points[:, :2] + show_range) / show_range / 2.0 * canva_size\n        for p in lidar_points:\n            if check_point_in_img(\n                    p.reshape(1, 3), canvas.shape[1], canvas.shape[0])[0]:\n                color = depth2color(p[2])\n                cv2.circle(\n                    canvas, (int(p[0]), int(p[1])),\n                    radius=0,\n                    color=color,\n                    thickness=1)\n\n        # draw instances\n        corners_lidar = corners_lidar.reshape(-1, 8, 3)\n        corners_lidar[:, :, 1] = -corners_lidar[:, :, 1]\n        bottom_corners_bev = corners_lidar[:, [0, 3, 7, 4], :2]\n        bottom_corners_bev = \\\n            (bottom_corners_bev + show_range) / show_range / 2.0 * canva_size\n        bottom_corners_bev = np.round(bottom_corners_bev).astype(np.int32)\n        center_bev = corners_lidar[:, [0, 3, 7, 4], :2].mean(axis=1)\n        head_bev = corners_lidar[:, [0, 4], :2].mean(axis=1)\n        canter_canvas = \\\n            (center_bev + show_range) / show_range / 2.0 * canva_size\n        center_canvas = canter_canvas.astype(np.int32)\n        head_canvas = (head_bev + show_range) / show_range / 2.0 * canva_size\n        head_canvas = head_canvas.astype(np.int32)\n\n        for rid in sort_ids:\n            score = scores[rid]\n            if score < args.vis_thred and pred_flag[rid]:\n                continue\n            score = min(score * 2.0, 1.0) if pred_flag[rid] else 1.0\n            color = color_map[int(pred_flag[rid])]\n            for index in draw_boxes_indexes_bev:\n                cv2.line(\n                    canvas,\n                    bottom_corners_bev[rid, index[0]],\n                    bottom_corners_bev[rid, index[1]],\n                    [color[0] * score, color[1] * score, color[2] * score],\n                    thickness=1)\n            cv2.line(\n                canvas,\n                center_canvas[rid],\n                head_canvas[rid],\n                [color[0] * score, color[1] * score, color[2] * score],\n                1,\n                lineType=8)\n\n        # fuse image-view and bev\n        img = np.zeros((900 * 2 + canva_size * scale_factor, 1600 * 3, 3),\n                       dtype=np.uint8)\n        img[:900, :, :] = np.concatenate(imgs[:3], axis=1)\n        img_back = np.concatenate(\n            [imgs[3][:, ::-1, :], imgs[4][:, ::-1, :], imgs[5][:, ::-1, :]],\n            axis=1)\n        img[900 + canva_size * scale_factor:, :, :] = img_back\n        img = cv2.resize(img, (int(1600 / scale_factor * 3),\n                               int(900 / scale_factor * 2 + canva_size)))\n        w_begin = int((1600 * 3 / scale_factor - canva_size) // 2)\n        img[int(900 / scale_factor):int(900 / scale_factor) + canva_size,\n            w_begin:w_begin + canva_size, :] = canvas\n\n        if args.format == 'image':\n            cv2.imwrite(os.path.join(vis_dir, '%s.jpg' % infos['token']), img)\n        elif args.format == 'video':\n            vout.write(img)\n    if args.format == 'video':\n        vout.release()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/analysis_tools/vis_occupancy.py",
    "content": "# pythonw vis_fru.py\n# from operator import gt\nimport pickle\nimport numpy as np\n# from omegaconf import DictConfig\nfrom mayavi import mlab\nfrom collections import Counter\n# path = r'n008-2018-08-28-16-16-48-0400__LIDAR_TOP__1535488206297315.pcd.bin'\n# points = np.fromfile(path, dtype=np.float16).reshape(-1, 5)\n# print(points.shape)\nimport argparse\npoint_cloud_range = [-50, -50, -2, 50, 50, 5]\nvoxel_size=[0.2, 0.2, 0.2]\nvoxel_shape=(int((point_cloud_range[3]-point_cloud_range[0])/voxel_size[0]),\n             int((point_cloud_range[4]-point_cloud_range[1])/voxel_size[1]),\n             int((point_cloud_range[5]-point_cloud_range[2])/voxel_size[2]))\nmap_label = {0: 0,\n                1: 1,\n                2: 1,\n                3: 1,\n                4: 1,\n                5: 1,\n                6: 1,\n                7: 1,\n                8: 1,\n                9: 2,\n                10: 2,\n                11: 2,\n                12: 2,\n                13: 2,\n                14: 3,\n                15: 3,\n                16: 3,\n                17: 3,\n                18: 3,\n                19: 3,\n                20: 3,\n                21: 3,\n                22: 3,\n                23: 3,\n                24: 4,\n                25: 4,\n                26: 4,\n                27: 4,\n                28: 4,\n                29: 4,\n                30: 4,\n                31: 3}\ndef remove_far(points, point_cloud_range):\n    mask = (points[:, 0]>point_cloud_range[0]) & (points[:, 0]<point_cloud_range[3]) & (points[:, 1]>point_cloud_range[1]) & (points[:, 1]<point_cloud_range[4]) \\\n            & (points[:, 2]>point_cloud_range[2]) & (points[:, 2]<point_cloud_range[5])\n    return points[mask, :]\n\ndef voxelize(voxel: np.array, label_count: np.array):\n    '''\n    '''\n    for x in range(voxel.shape[0]):\n        for y in range(voxel.shape[1]):\n            for z in range(voxel.shape[2]):\n                if label_count[x, y, z] == 0:\n                    continue\n                labels = voxel[x, y, z]\n                if np.unique(labels).shape[0] == 0:\n                    # import ipdb; ipdb.set_trace()\n                    assert False\n                    continue\n                # import ipdb\n                # ipdb.set_trace()\n                # print(np.argmax(np.bincount(labels[labels!=0])))\n                try:\n                    label_count[x, y, z] = np.argmax(np.bincount(labels[labels!=0]))\n                except:\n                    print(labels)\n    return label_count\n\ndef points2voxel(points, voxel_shape, voxel_size, max_points=5, specific_category=None):\n    voxel = np.zeros((*voxel_shape, max_points), dtype=np.int64)\n    label_count = np.zeros((voxel_shape), dtype=np.int64)\n    index = points[:, 4].argsort()\n    points = points[index]\n    for point in points:\n      \n        x, y, z = point[0], point[1], point[2]\n        x = round((x - point_cloud_range[0]) / voxel_size[0])\n        y = round((y - point_cloud_range[1]) / voxel_size[1])\n        z = round((z - point_cloud_range[2]) / voxel_size[2])\n        if point[4] == 31:\n            continue\n        if specific_category and int(point[4]) not in  specific_category:\n            continue\n        try:\n            voxel[x, y, z, label_count[x, y, z]] = int(point[4])  # map_label[int(point[4])]\n            label_count[x, y, z] += 1\n        except:\n            # import ipdb\n            # ipdb.set_trace()\n            continue\n\n    voxel = voxelize(voxel, label_count)\n    label_count[label_count==max_points] = 0\n    voxel = voxel.astype(np.float64)\n    # from IPython import embed\n    # embed()\n    # exit()\n    return voxel\n\n\n\n# voxel = points2voxel(points, voxel_shape, voxel_size, 100)\ndef get_grid_coords(dims, resolution):\n    \"\"\"\n    :param dims: the dimensions of the grid [x, y, z] (i.e. [256, 256, 32])\n    :return coords_grid: is the center coords of voxels in the grid\n    \"\"\"\n\n    g_xx = np.arange(0, dims[0] + 1)\n    g_yy = np.arange(0, dims[1] + 1)\n    g_zz = np.arange(0, dims[2] + 1)\n\n    # Obtaining the grid with coords...\n    xx, yy, zz = np.meshgrid(g_xx[:-1], g_yy[:-1], g_zz[:-1])\n    coords_grid = np.array([xx.flatten(), yy.flatten(), zz.flatten()]).T\n    coords_grid = coords_grid.astype(np.float32)\n\n    coords_grid = (coords_grid * resolution) + resolution / 2\n\n    temp = np.copy(coords_grid)\n    temp[:, 0] = coords_grid[:, 1]\n    temp[:, 1] = coords_grid[:, 0]\n    coords_grid = np.copy(temp)\n\n    return coords_grid\n\n\ndef draw(\n    voxels,\n    T_velo_2_cam,\n    vox_origin,\n    fov_mask,\n    img_size,\n    f,\n    voxel_size=0.2,\n    d=7,  # 7m - determine the size of the mesh representing the camera\n):\n    # Compute the coordinates of the mesh representing camera\n    x = d * img_size[0] / (2 * f)\n    y = d * img_size[1] / (2 * f)\n    tri_points = np.array(\n        [\n            [0, 0, 0],\n            [x, y, d],\n            [-x, y, d],\n            [-x, -y, d],\n            [x, -y, d],\n        ]\n    )\n    # tri_points = np.hstack([tri_points, np.ones((5, 1))])\n    # tri_points = (np.linalg.inv(T_velo_2_cam) @ tri_points.T).T\n    x = tri_points[:, 0]\n    y = tri_points[:, 1]\n    z = tri_points[:, 2]\n    triangles = [\n        (0, 1, 2),\n        (0, 1, 4),\n        (0, 3, 4),\n        (0, 2, 3),\n    ]\n\n    # Compute the voxels coordinates\n    grid_coords = get_grid_coords(\n        [voxels.shape[0], voxels.shape[1], voxels.shape[2]], voxel_size\n    )\n\n    # Attach the predicted class to every voxel\n    grid_coords = np.vstack([grid_coords.T, voxels.reshape(-1)]).T\n\n    # Get the voxels inside FOV\n    fov_grid_coords = grid_coords\n\n    # # Get the voxels outside FOV\n    # outfov_grid_coords = grid_coords[~fov_mask, :]\n\n    # Remove empty and unknown voxels\n    fov_voxels = fov_grid_coords[\n        (fov_grid_coords[:, 3] > 0) & (fov_grid_coords[:, 3] < 255)\n    ]\n    # outfov_voxels = outfov_grid_coords[\n    #     (outfov_grid_coords[:, 3] > 0) & (outfov_grid_coords[:, 3] < 255)\n    # ]\n\n    figure = mlab.figure(size=(1400, 1400), bgcolor=(1, 1, 1))\n\n    # Draw the camera\n    # mlab.triangular_mesh(\n    #     x, y, z, triangles, representation=\"wireframe\", color=(0, 0, 0), line_width=5\n    # )\n\n    \n    # counter = Counter(list(fov_voxels[:,3].reshape(-1)))\n    # for key in counter:\n    #     if counter[key] < 100:\n    #         index = fov_voxels[:,3] != key\n    #         fov_voxels = fov_voxels[index]\n    # Draw occupied inside FOV voxels\n    plt_plot_fov = mlab.points3d(\n        fov_voxels[:, 0],\n        fov_voxels[:, 1],\n        fov_voxels[:, 2],\n        fov_voxels[:, 3],\n        colormap=\"viridis\",\n        scale_factor=voxel_size - 0.05 * voxel_size,\n        mode=\"cube\",\n        opacity=1.0,\n        vmin=1,\n        vmax=19,\n    )\n\n    # Draw occupied outside FOV voxels\n    # plt_plot_outfov = mlab.points3d(\n    #     outfov_voxels[:, 0],\n    #     outfov_voxels[:, 1],\n    #     outfov_voxels[:, 2],\n    #     outfov_voxels[:, 3],\n    #     colormap=\"viridis\",\n    #     scale_factor=voxel_size - 0.05 * voxel_size,\n    #     mode=\"cube\",\n    #     opacity=1.0,\n    #     vmin=1,\n    #     vmax=19,\n    # )\n\n    classname_to_color = {  # RGB.\n        \"noise\": (0, 0, 0),  # Black.\n        \"animal\": (70, 130, 180),  # Steelblue\n        \"human.pedestrian.adult\": (0, 0, 230),  # Blue\n        \"human.pedestrian.child\":(0, 0, 230),  # Skyblue,\n        \"human.pedestrian.construction_worker\":(0, 0, 230),  # Cornflowerblue\n        \"human.pedestrian.personal_mobility\": (0, 0, 230),  # Palevioletred\n        \"human.pedestrian.police_officer\":(0, 0, 230),  # Navy,\n        \"human.pedestrian.stroller\": (0, 0, 230),  # Lightcoral\n        \"human.pedestrian.wheelchair\": (0, 0, 230),  # Blueviolet\n        \"movable_object.barrier\": (112, 128, 144),  # Slategrey\n        \"movable_object.debris\": (112, 128, 144),  # Chocolate\n        \"movable_object.pushable_pullable\":(112, 128, 144),  # Dimgrey\n        \"movable_object.trafficcone\":(112, 128, 144),  # Darkslategrey\n        \"static_object.bicycle_rack\": (188, 143, 143),  # Rosybrown\n        \"vehicle.bicycle\": (220, 20, 60),  # Crimson\n        \"vehicle.bus.bendy\":(255, 158, 0),  # Coral\n        \"vehicle.bus.rigid\": (255, 158, 0),  # Orangered\n        \"vehicle.car\": (255, 158, 0),  # Orange\n        \"vehicle.construction\":(255, 158, 0),  # Darksalmon\n        \"vehicle.emergency.ambulance\":(255, 158, 0),\n        \"vehicle.emergency.police\": (255, 158, 0),  # Gold\n        \"vehicle.motorcycle\": (255, 158, 0),  # Red\n        \"vehicle.trailer\":(255, 158, 0),  # Darkorange\n        \"vehicle.truck\": (255, 158, 0),  # Tomato\n        \"flat.driveable_surface\": (0, 207, 191),  # nuTonomy green\n        \"flat.other\":(0, 207, 191),\n        \"flat.sidewalk\": (75, 0, 75),\n        \"flat.terrain\": (0, 207, 191),\n        \"static.manmade\": (222, 184, 135),  # Burlywood\n        \"static.other\": (0, 207, 191),  # Bisque\n        \"static.vegetation\": (0, 175, 0),  # Green\n        \"vehicle.ego\": (255, 240, 245)\n    }\n    \n    classname_to_color= {'ignore_class': (0, 0, 0),  # Black.\n                'barrier': (112, 128, 144),  # Slategrey\n                'bicycle': (220, 20, 60),  # Crimson\n                'bus': (255, 127, 80),  # Coral\n                'car': (255, 158, 0),  # Orange\n                'construction_vehicle': (233, 150, 70),  # Darksalmon\n                'motorcycle': (255, 61, 99),  # Red\n                'pedestrian': (0, 0, 230),  # Blue\n                'traffic_cone': (47, 79, 79),  # Darkslategrey\n                'trailer': (255, 140, 0),  # Darkorange\n                'truck': (255, 99, 71),  # Tomato\n                'driveable_surface': (0, 207, 191),  # nuTonomy green\n                'other_flat': (175, 0, 75),\n                'sidewalk': (75, 0, 75),\n                'terrain': (112, 180, 60),\n                'manmade': (222, 184, 135),  # Burlywood\n                'vegetation': (0, 175, 0)}\n    colors = np.array(list(classname_to_color.values())).astype(np.uint8)\n    alpha = np.ones((colors.shape[0], 1), dtype=np.uint8) * 255\n    colors = np.hstack([colors, alpha])\n\n\n\n    plt_plot_fov.glyph.scale_mode = \"scale_by_vector\"\n    # plt_plot_outfov.glyph.scale_mode = \"scale_by_vector\"\n\n    plt_plot_fov.module_manager.scalar_lut_manager.lut.table = colors\n    plt_plot_fov.module_manager.scalar_lut_manager.data_range = [0, 17]\n\n    mlab.show()\n\ndef voxel_exist(voxels, x,y,z):\n    if x < 0 or y < 0 or z < 0 or x >= voxels.shape[0] or y >= voxels.shape[1] or z >= voxels.shape[2]:\n        return False\n    else:\n        return voxels[x,y,z]\n\ndef max_connected(voxels, distance=3):\n    \"\"\" Keep the max connected component of the voxels (a boolean matrix). \n    distance is the distance considered as neighbors, i.e. if distance = 2, \n    then two blocks are considered connected even with a hole in between\"\"\"\n    assert(distance > 0)\n    component_list = []\n    # max_component = np.zeros(voxels.shape)\n    voxels_copy = np.copy(voxels)\n    for startx in range(voxels.shape[0]):\n        for starty in range(voxels.shape[1]):\n            for startz in range(voxels.shape[2]):\n                if not voxels_copy[startx,starty,startz]:\n                    continue\n                # start a new component\n                component = np.zeros(voxels.shape, dtype=bool)\n                stack = [[startx,starty,startz]]\n                component[startx,starty,startz] = True\n                voxels_copy[startx,starty,startz] = False\n                while len(stack) > 0:\n                    x,y,z = stack.pop()\n                    category = voxels[x,y,z]\n                    for i in range(x-distance, x+distance + 1):\n                        for j in range(y-distance, y+distance + 1):\n                            for k in range(z-distance, z+distance + 1):\n                                if (i-x)**2+(j-y)**2+(k-z)**2 > distance * distance:\n                                    continue\n                                category = voxels[x,y,z]\n                                if voxel_exist(voxels_copy, i,j,k) and voxels[i,j,k] == category:\n                                    voxels_copy[i,j,k] = False\n                                    component[i,j,k] = True\n                                    stack.append([i,j,k])\n                component_list.append(component)\n                # if component.sum() > max_component.sum():\n                #     max_component = component\n                    \n\n    max_component = np.zeros(voxels.shape,  dtype=bool)\n    for each in component_list:\n        if each.sum()>10:\n            max_component |= each\n    return max_component \n\n# points = remove_far(points, point_cloud_range)\ndef main(filepath='*.npz'):\n\n    vox_origin = np.array([0, 0, -2])\n\n\n    # y_pred = points2voxel(points, voxel_shape, voxel_size, 20)\n    # y_del = ~max_connected(y_pred)\n    # y_pred[y_del] = 0\n   \n    if filepath.endswith('npy'):\n        y_pred = np.load(filepath)\n    elif filepath.endswith('npz'):\n        y_pred = np.load(filepath)['pred']# ['semantics']\n\n    # y_pred: shape 200x200x16\n    draw(\n        y_pred,\n        None,\n        vox_origin,\n        None,\n        voxel_size=0.2,\n        f=552.55426,\n        img_size=(1600, 900),\n        d=7,\n    )\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(description='vis occ')\n    parser.add_argument('path', help='path to npz')\n    args = parser.parse_args()\n    main(args.path)"
  },
  {
    "path": "tools/create_data.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nfrom os import path as osp\n\nfrom tools.data_converter import indoor_converter as indoor\nfrom tools.data_converter import kitti_converter as kitti\nfrom tools.data_converter import lyft_converter as lyft_converter\nfrom tools.data_converter import nuscenes_converter as nuscenes_converter\nfrom tools.data_converter.create_gt_database import (\n    GTDatabaseCreater, create_groundtruth_database)\n\n\ndef kitti_data_prep(root_path,\n                    info_prefix,\n                    version,\n                    out_dir,\n                    with_plane=False):\n    \"\"\"Prepare data related to Kitti dataset.\n\n    Related data consists of '.pkl' files recording basic infos,\n    2D annotations and groundtruth database.\n\n    Args:\n        root_path (str): Path of dataset root.\n        info_prefix (str): The prefix of info filenames.\n        version (str): Dataset version.\n        out_dir (str): Output directory of the groundtruth database info.\n        with_plane (bool, optional): Whether to use plane information.\n            Default: False.\n    \"\"\"\n    kitti.create_kitti_info_file(root_path, info_prefix, with_plane)\n    kitti.create_reduced_point_cloud(root_path, info_prefix)\n\n    info_train_path = osp.join(root_path, f'{info_prefix}_infos_train.pkl')\n    info_val_path = osp.join(root_path, f'{info_prefix}_infos_val.pkl')\n    info_trainval_path = osp.join(root_path,\n                                  f'{info_prefix}_infos_trainval.pkl')\n    info_test_path = osp.join(root_path, f'{info_prefix}_infos_test.pkl')\n    kitti.export_2d_annotation(root_path, info_train_path)\n    kitti.export_2d_annotation(root_path, info_val_path)\n    kitti.export_2d_annotation(root_path, info_trainval_path)\n    kitti.export_2d_annotation(root_path, info_test_path)\n\n    create_groundtruth_database(\n        'KittiDataset',\n        root_path,\n        info_prefix,\n        f'{out_dir}/{info_prefix}_infos_train.pkl',\n        relative_path=False,\n        mask_anno_path='instances_train.json',\n        with_mask=(version == 'mask'))\n\n\ndef nuscenes_data_prep(root_path,\n                       info_prefix,\n                       version,\n                       dataset_name,\n                       out_dir,\n                       max_sweeps=10):\n    \"\"\"Prepare data related to nuScenes dataset.\n\n    Related data consists of '.pkl' files recording basic infos,\n    2D annotations and groundtruth database.\n\n    Args:\n        root_path (str): Path of dataset root.\n        info_prefix (str): The prefix of info filenames.\n        version (str): Dataset version.\n        dataset_name (str): The dataset class name.\n        out_dir (str): Output directory of the groundtruth database info.\n        max_sweeps (int, optional): Number of input consecutive frames.\n            Default: 10\n    \"\"\"\n    nuscenes_converter.create_nuscenes_infos(\n        root_path, info_prefix, version=version, max_sweeps=max_sweeps)\n\n    if version == 'v1.0-test':\n        info_test_path = osp.join(root_path, f'{info_prefix}_infos_test.pkl')\n        nuscenes_converter.export_2d_annotation(\n            root_path, info_test_path, version=version)\n        return\n\n    info_train_path = osp.join(root_path, f'{info_prefix}_infos_train.pkl')\n    info_val_path = osp.join(root_path, f'{info_prefix}_infos_val.pkl')\n    nuscenes_converter.export_2d_annotation(\n        root_path, info_train_path, version=version)\n    nuscenes_converter.export_2d_annotation(\n        root_path, info_val_path, version=version)\n    create_groundtruth_database(dataset_name, root_path, info_prefix,\n                                f'{out_dir}/{info_prefix}_infos_train.pkl')\n\n\ndef lyft_data_prep(root_path, info_prefix, version, max_sweeps=10):\n    \"\"\"Prepare data related to Lyft dataset.\n\n    Related data consists of '.pkl' files recording basic infos.\n    Although the ground truth database and 2D annotations are not used in\n    Lyft, it can also be generated like nuScenes.\n\n    Args:\n        root_path (str): Path of dataset root.\n        info_prefix (str): The prefix of info filenames.\n        version (str): Dataset version.\n        max_sweeps (int, optional): Number of input consecutive frames.\n            Defaults to 10.\n    \"\"\"\n    lyft_converter.create_lyft_infos(\n        root_path, info_prefix, version=version, max_sweeps=max_sweeps)\n\n\ndef scannet_data_prep(root_path, info_prefix, out_dir, workers):\n    \"\"\"Prepare the info file for scannet dataset.\n\n    Args:\n        root_path (str): Path of dataset root.\n        info_prefix (str): The prefix of info filenames.\n        out_dir (str): Output directory of the generated info file.\n        workers (int): Number of threads to be used.\n    \"\"\"\n    indoor.create_indoor_info_file(\n        root_path, info_prefix, out_dir, workers=workers)\n\n\ndef s3dis_data_prep(root_path, info_prefix, out_dir, workers):\n    \"\"\"Prepare the info file for s3dis dataset.\n\n    Args:\n        root_path (str): Path of dataset root.\n        info_prefix (str): The prefix of info filenames.\n        out_dir (str): Output directory of the generated info file.\n        workers (int): Number of threads to be used.\n    \"\"\"\n    indoor.create_indoor_info_file(\n        root_path, info_prefix, out_dir, workers=workers)\n\n\ndef sunrgbd_data_prep(root_path, info_prefix, out_dir, workers, num_points):\n    \"\"\"Prepare the info file for sunrgbd dataset.\n\n    Args:\n        root_path (str): Path of dataset root.\n        info_prefix (str): The prefix of info filenames.\n        out_dir (str): Output directory of the generated info file.\n        workers (int): Number of threads to be used.\n    \"\"\"\n    indoor.create_indoor_info_file(\n        root_path,\n        info_prefix,\n        out_dir,\n        workers=workers,\n        num_points=num_points)\n\n\ndef waymo_data_prep(root_path,\n                    info_prefix,\n                    version,\n                    out_dir,\n                    workers,\n                    max_sweeps=5):\n    \"\"\"Prepare the info file for waymo dataset.\n\n    Args:\n        root_path (str): Path of dataset root.\n        info_prefix (str): The prefix of info filenames.\n        out_dir (str): Output directory of the generated info file.\n        workers (int): Number of threads to be used.\n        max_sweeps (int, optional): Number of input consecutive frames.\n            Default: 5. Here we store pose information of these frames\n            for later use.\n    \"\"\"\n    from tools.data_converter import waymo_converter as waymo\n\n    splits = ['training', 'validation', 'testing']\n    for i, split in enumerate(splits):\n        load_dir = osp.join(root_path, 'waymo_format', split)\n        if split == 'validation':\n            save_dir = osp.join(out_dir, 'kitti_format', 'training')\n        else:\n            save_dir = osp.join(out_dir, 'kitti_format', split)\n        converter = waymo.Waymo2KITTI(\n            load_dir,\n            save_dir,\n            prefix=str(i),\n            workers=workers,\n            test_mode=(split == 'testing'))\n        converter.convert()\n    # Generate waymo infos\n    out_dir = osp.join(out_dir, 'kitti_format')\n    kitti.create_waymo_info_file(\n        out_dir, info_prefix, max_sweeps=max_sweeps, workers=workers)\n    GTDatabaseCreater(\n        'WaymoDataset',\n        out_dir,\n        info_prefix,\n        f'{out_dir}/{info_prefix}_infos_train.pkl',\n        relative_path=False,\n        with_mask=False,\n        num_worker=workers).create()\n\n\nparser = argparse.ArgumentParser(description='Data converter arg parser')\nparser.add_argument('dataset', metavar='kitti', help='name of the dataset')\nparser.add_argument(\n    '--root-path',\n    type=str,\n    default='./data/kitti',\n    help='specify the root path of dataset')\nparser.add_argument(\n    '--version',\n    type=str,\n    default='v1.0',\n    required=False,\n    help='specify the dataset version, no need for kitti')\nparser.add_argument(\n    '--max-sweeps',\n    type=int,\n    default=10,\n    required=False,\n    help='specify sweeps of lidar per example')\nparser.add_argument(\n    '--with-plane',\n    action='store_true',\n    help='Whether to use plane information for kitti.')\nparser.add_argument(\n    '--num-points',\n    type=int,\n    default=-1,\n    help='Number of points to sample for indoor datasets.')\nparser.add_argument(\n    '--out-dir',\n    type=str,\n    default='./data/kitti',\n    required=False,\n    help='name of info pkl')\nparser.add_argument('--extra-tag', type=str, default='kitti')\nparser.add_argument(\n    '--workers', type=int, default=4, help='number of threads to be used')\nargs = parser.parse_args()\n\nif __name__ == '__main__':\n    if args.dataset == 'kitti':\n        kitti_data_prep(\n            root_path=args.root_path,\n            info_prefix=args.extra_tag,\n            version=args.version,\n            out_dir=args.out_dir,\n            with_plane=args.with_plane)\n    elif args.dataset == 'nuscenes' and args.version != 'v1.0-mini':\n        train_version = f'{args.version}-trainval'\n        nuscenes_data_prep(\n            root_path=args.root_path,\n            info_prefix=args.extra_tag,\n            version=train_version,\n            dataset_name='NuScenesDataset',\n            out_dir=args.out_dir,\n            max_sweeps=args.max_sweeps)\n        test_version = f'{args.version}-test'\n        nuscenes_data_prep(\n            root_path=args.root_path,\n            info_prefix=args.extra_tag,\n            version=test_version,\n            dataset_name='NuScenesDataset',\n            out_dir=args.out_dir,\n            max_sweeps=args.max_sweeps)\n    elif args.dataset == 'nuscenes' and args.version == 'v1.0-mini':\n        train_version = f'{args.version}'\n        nuscenes_data_prep(\n            root_path=args.root_path,\n            info_prefix=args.extra_tag,\n            version=train_version,\n            dataset_name='NuScenesDataset',\n            out_dir=args.out_dir,\n            max_sweeps=args.max_sweeps)\n    elif args.dataset == 'lyft':\n        train_version = f'{args.version}-train'\n        lyft_data_prep(\n            root_path=args.root_path,\n            info_prefix=args.extra_tag,\n            version=train_version,\n            max_sweeps=args.max_sweeps)\n        test_version = f'{args.version}-test'\n        lyft_data_prep(\n            root_path=args.root_path,\n            info_prefix=args.extra_tag,\n            version=test_version,\n            max_sweeps=args.max_sweeps)\n    elif args.dataset == 'waymo':\n        waymo_data_prep(\n            root_path=args.root_path,\n            info_prefix=args.extra_tag,\n            version=args.version,\n            out_dir=args.out_dir,\n            workers=args.workers,\n            max_sweeps=args.max_sweeps)\n    elif args.dataset == 'scannet':\n        scannet_data_prep(\n            root_path=args.root_path,\n            info_prefix=args.extra_tag,\n            out_dir=args.out_dir,\n            workers=args.workers)\n    elif args.dataset == 's3dis':\n        s3dis_data_prep(\n            root_path=args.root_path,\n            info_prefix=args.extra_tag,\n            out_dir=args.out_dir,\n            workers=args.workers)\n    elif args.dataset == 'sunrgbd':\n        sunrgbd_data_prep(\n            root_path=args.root_path,\n            info_prefix=args.extra_tag,\n            num_points=args.num_points,\n            out_dir=args.out_dir,\n            workers=args.workers)\n"
  },
  {
    "path": "tools/create_data.sh",
    "content": "#!/usr/bin/env bash\n\nset -x\nexport PYTHONPATH=`pwd`:$PYTHONPATH\n\nPARTITION=$1\nJOB_NAME=$2\nDATASET=$3\nGPUS=${GPUS:-1}\nGPUS_PER_NODE=${GPUS_PER_NODE:-1}\nSRUN_ARGS=${SRUN_ARGS:-\"\"}\nJOB_NAME=create_data\n\nsrun -p ${PARTITION} \\\n    --job-name=${JOB_NAME} \\\n    --gres=gpu:${GPUS_PER_NODE} \\\n    --ntasks=${GPUS} \\\n    --ntasks-per-node=${GPUS_PER_NODE} \\\n    --kill-on-bad-exit=1 \\\n    ${SRUN_ARGS} \\\n    python -u tools/create_data.py ${DATASET} \\\n            --root-path ./data/${DATASET} \\\n            --out-dir ./data/${DATASET} \\\n            --extra-tag ${DATASET}\n"
  },
  {
    "path": "tools/create_data_bev_planner.py",
    "content": "# Copyright (c) 2023-2024, NVIDIA Corporation & Affiliates. All rights reserved. \n# \n# This work is made available under the Nvidia Source Code License-NC. \n# To view a copy of this license, visit \n# TODO: add license here\n\nimport pickle\n\nimport numpy as np\nfrom nuscenes import NuScenes\nfrom nuscenes.utils.data_classes import Box\nfrom pyquaternion import Quaternion\n\nfrom tools.data_converter import nuscenes_converter as nuscenes_converter\n# from tools.data_converter.nuscenes_prediction_tools import  get_forecasting_annotations\nmap_name_from_general_to_detection = {\n    'human.pedestrian.adult': 'pedestrian',\n    'human.pedestrian.child': 'pedestrian',\n    'human.pedestrian.wheelchair': 'ignore',\n    'human.pedestrian.stroller': 'ignore',\n    'human.pedestrian.personal_mobility': 'ignore',\n    'human.pedestrian.police_officer': 'pedestrian',\n    'human.pedestrian.construction_worker': 'pedestrian',\n    'animal': 'ignore',\n    'vehicle.car': 'car',\n    'vehicle.motorcycle': 'motorcycle',\n    'vehicle.bicycle': 'bicycle',\n    'vehicle.bus.bendy': 'bus',\n    'vehicle.bus.rigid': 'bus',\n    'vehicle.truck': 'truck',\n    'vehicle.construction': 'construction_vehicle',\n    'vehicle.emergency.ambulance': 'ignore',\n    'vehicle.emergency.police': 'ignore',\n    'vehicle.trailer': 'trailer',\n    'movable_object.barrier': 'barrier',\n    'movable_object.trafficcone': 'traffic_cone',\n    'movable_object.pushable_pullable': 'ignore',\n    'movable_object.debris': 'ignore',\n    'static_object.bicycle_rack': 'ignore',\n}\nclasses = [\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\n\n\nVERSION= 'v1.0-mini'\nNUSCENES = 'nuscenes-mini'\n# VERSION= 'v1.0-trainval'\n# NUSCENES = 'nuscenes'\ndef get_gt(info, traj_in_lidar_coor=None, traj_mask_in_lidar_coor=None):\n    \"\"\"Generate gt labels from info.\n\n    Args:\n        info(dict): Infos needed to generate gt labels.\n\n    Returns:\n        Tensor: GT bboxes.\n        Tensor: GT labels.\n    \"\"\"\n\n    ego2global_rotation = info['cams']['CAM_FRONT']['ego2global_rotation']\n    ego2global_translation = info['cams']['CAM_FRONT'][\n        'ego2global_translation']\n    trans = -np.array(ego2global_translation)\n    rot = Quaternion(ego2global_rotation).inverse\n    gt_boxes = list()\n    gt_boxes_in_global = list()\n    gt_labels = list()\n    fut_traj = list()\n    fut_traj_mask = list()\n    valid_flag = list()\n    for i, ann_info in enumerate(info['ann_infos']):\n        # Use ego coordinate.\n        if (map_name_from_general_to_detection[ann_info['category_name']]\n                not in classes\n                or ann_info['num_lidar_pts'] + ann_info['num_radar_pts'] <= 0):\n            valid_flag.append(False)\n            continue\n        valid_flag.append(True)\n        box = Box(\n            ann_info['translation'],\n            ann_info['size'],\n            Quaternion(ann_info['rotation']),\n            velocity=ann_info['velocity'],\n        )\n        box_xyz_in_global = np.array(box.center)\n        box_dxdydz_in_global = np.array(box.wlh)[[1, 0, 2]]\n        box_yaw_in_global = np.array([box.orientation.yaw_pitch_roll[0]])\n        box_velo_in_global = np.array(box.velocity[:2])\n\n        box.translate(trans)\n        box.rotate(rot)\n        box_xyz = np.array(box.center)\n        box_dxdydz = np.array(box.wlh)[[1, 0, 2]]\n        box_yaw = np.array([box.orientation.yaw_pitch_roll[0]])\n        box_velo = np.array(box.velocity[:2])\n        gt_box = np.concatenate([box_xyz, box_dxdydz, box_yaw, box_velo])\n        gt_box_in_global = np.concatenate([box_xyz_in_global, box_dxdydz_in_global, box_yaw_in_global, box_velo_in_global])\n        gt_boxes.append(gt_box)\n        gt_boxes_in_global.append(gt_box_in_global)\n        gt_labels.append(\n            classes.index(\n                map_name_from_general_to_detection[ann_info['category_name']]))\n\n\n        if traj_in_lidar_coor is not None:\n            # traj = np.dot(Quaternion(info['lidar2ego_rotation']).rotation_matrix[:2,:2],traj_in_lidar_coor[i].transpose(1,0)).transpose(1,0)\n            fut_traj.append(traj_in_lidar_coor[i])\n            fut_traj_mask.append(traj_mask_in_lidar_coor[i])\n\n\n    return gt_boxes, gt_labels, fut_traj, fut_traj_mask, np.array(valid_flag), gt_boxes_in_global\n\ndef nuscenes_data_prep(root_path, info_prefix, version, max_sweeps=10):\n    \"\"\"Prepare data related to nuScenes dataset.\n\n    Related data consists of '.pkl' files recording basic infos,\n    2D annotations and groundtruth database.\n\n    Args:\n        root_path (str): Path of dataset root.\n        info_prefix (str): The prefix of info filenames.\n        version (str): Dataset version.\n        max_sweeps (int, optional): Number of input consecutive frames.\n            Default: 10\n    \"\"\"\n    nuscenes_converter.create_nuscenes_infos(\n        root_path, info_prefix, version=version, max_sweeps=max_sweeps)\n\n\n\ndef add_ann_adj_info(extra_tag, with_lidar_seg=False):\n    nuscenes_version = VERSION\n    dataroot = f'./data/{NUSCENES}/'\n    nuscenes = NuScenes(nuscenes_version, dataroot)\n    # for set in ['test']:\n    #     dataset = pickle.load(\n    #         open('./data/%s/%s_infos_%s.pkl' % (NUSCENES, extra_tag, set), 'rb'))\n    #     for id in range(len(dataset['infos'])):\n    #         if id % 10 == 0:\n    #             print('%d/%d' % (id, len(dataset['infos'])))\n    #         info = dataset['infos'][id]\n    #         # get sweep adjacent frame info\n    #         sample = nuscenes.get('sample', info['token'])\n    #         ann_infos = list()\n    #         for ann in sample['anns']:\n    #             ann_info = nuscenes.get('sample_annotation', ann)\n    #             velocity = nuscenes.box_velocity(ann_info['token'])\n    #             if np.any(np.isnan(velocity)):\n    #                 velocity = np.zeros(3)\n    #             ann_info['velocity'] = velocity\n    #             ann_infos.append(ann_info)\n    #         dataset['infos'][id]['ann_infos'] = ann_infos\n    #         dataset['infos'][id]['ann_infos'] = get_gt(dataset['infos'][id])\n    #         dataset['infos'][id]['scene_token'] = sample['scene_token']\n    #         scene = nuscenes.get('scene',  sample['scene_token'])\n    #         dataset['infos'][id]['scene_name'] = scene['name']\n    #         dataset['infos'][id]['prev'] = sample['prev']\n    #         # description = scene['description']\n    #         if with_lidar_seg:\n    #             lidar_sd_token = sample['data']['LIDAR_TOP']\n    #             dataset['infos'][id]['lidarseg_filename'] =  nuscenes.get('lidarseg', lidar_sd_token)['filename']\n\n\n    #         scene = nuscenes.get('scene', sample['scene_token'])\n    #         dataset['infos'][id]['occ_path'] = \\\n    #             './data/nuscenes/gts/%s/%s'%(scene['name'], info['token'])\n    #     with open('./data/%s/%s_infos_%s.pkl' % (NUSCENES, extra_tag, set),\n    #               'wb') as fid:\n    #         pickle.dump(dataset, fid)\n\n    for set in ['train', 'val']:\n        dataset = pickle.load(\n            open('./data/%s/%s_infos_%s.pkl' % (NUSCENES, extra_tag, set), 'rb'))\n        # traj_data =  pickle.load(open(f'/mount/data/GoGo/data/infos/nuscenes_infos_temporal_{set}.pkl', 'rb'))            \n        # traj_data = None\n        for id in range(len(dataset['infos'])):\n            if id % 10 == 0:\n                print('%d/%d' % (id, len(dataset['infos'])))\n            info = dataset['infos'][id]\n            # get sweep adjacent frame info\n            sample = nuscenes.get('sample', info['token'])\n            ann_infos = list()\n            for ann in sample['anns']:\n                ann_info = nuscenes.get('sample_annotation', ann)\n                velocity = nuscenes.box_velocity(ann_info['token'])\n                if np.any(np.isnan(velocity)):\n                    velocity = np.zeros(3)\n                ann_info['velocity'] = velocity\n                ann_infos.append(ann_info)\n            dataset['infos'][id]['ann_infos'] = ann_infos\n            # traj_info = traj_data['infos'][id] if traj_data is not None else None\n            future_traj_all, future_traj_valid_mask_all = dataset['infos'][id]['fut_traj'],  dataset['infos'][id]['fut_traj_valid_mask']\n            gt_boxes_3d, gt_labels_3d, fut_traj, fut_traj_mask, valid_flag, gt_boxes_3d_in_global = get_gt(dataset['infos'][id], future_traj_all, future_traj_valid_mask_all)\n\n            dataset['infos'][id]['ann_infos'] = {}\n            if fut_traj is not None:\n                dataset['infos'][id]['ann_infos']['fut_traj'] = fut_traj\n                dataset['infos'][id]['ann_infos']['fut_traj_mask'] = fut_traj_mask\n            dataset['infos'][id]['ann_infos']['gt_boxes_2d'] = dataset['infos'][id]['bboxes2d']\n            dataset['infos'][id]['ann_infos']['gt_labels_2d'] = dataset['infos'][id]['labels2d']\n            dataset['infos'][id]['ann_infos']['depths'] = dataset['infos'][id]['depths']            \n            dataset['infos'][id]['ann_infos']['centers2d'] = dataset['infos'][id]['centers2d']\n\n            dataset['infos'][id]['ann_infos']['gt_boxes_3d'] = gt_boxes_3d\n            dataset['infos'][id]['ann_infos']['gt_boxes_3d_in_global'] = gt_boxes_3d_in_global\n            dataset['infos'][id]['ann_infos']['gt_labels_3d'] = gt_labels_3d\n            dataset['infos'][id]['scene_token'] = sample['scene_token']\n            scene = nuscenes.get('scene',  sample['scene_token'])\n            map_location = nuscenes.get('log', scene['log_token'])['location']\n            dataset['infos'][id]['map_location'] = map_location\n            dataset['infos'][id]['scene_name'] = scene['name']\n            dataset['infos'][id]['prev'] = sample['prev']\n            \n            annotations = [\n                nuscenes.get('sample_annotation', token)\n                for token in sample['anns']\n            ]\n\n            instance_inds = [nuscenes.getind('instance', ann['instance_token']) for ann in annotations]\n            info['instance_inds'] = instance_inds\n            info['valid_flag'] = valid_flag\n\n            # description = scene['description']\n            if with_lidar_seg:\n                lidar_sd_token = sample['data']['LIDAR_TOP']\n                dataset['infos'][id]['lidarseg_filename'] =  nuscenes.get('lidarseg', lidar_sd_token)['filename']\n            scene = nuscenes.get('scene', sample['scene_token'])\n            dataset['infos'][id]['occ_path'] = \\\n                './data/nuscenes/gts/%s/%s'%(scene['name'], info['token'])\n        with open('./data/%s/%s_infos_%s.pkl' % (NUSCENES, extra_tag, set),\n                  'wb') as fid:\n            pickle.dump(dataset, fid)\n\n\nif __name__ == '__main__':\n    dataset = 'nuscenes'\n    version = 'v1.0'\n    train_version = VERSION\n    root_path = f'./data/{NUSCENES}'\n    extra_tag = 'bev-next-nuscenes'\n    nuscenes_data_prep(\n        root_path=root_path,\n        info_prefix=extra_tag,\n        version=train_version,\n        max_sweeps=10)\n\n    print('add_ann_infos')\n    add_ann_adj_info(extra_tag)\n"
  },
  {
    "path": "tools/data_converter/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n"
  },
  {
    "path": "tools/data_converter/create_gt_database.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pickle\nfrom os import path as osp\n\nimport mmcv\nimport numpy as np\nfrom mmcv import track_iter_progress\nfrom mmcv.ops import roi_align\nfrom pycocotools import mask as maskUtils\nfrom pycocotools.coco import COCO\n\nfrom mmdet3d.core.bbox import box_np_ops as box_np_ops\nfrom mmdet3d.datasets import build_dataset\nfrom mmdet.core.evaluation.bbox_overlaps import bbox_overlaps\n\n\ndef _poly2mask(mask_ann, img_h, img_w):\n    if isinstance(mask_ann, list):\n        # polygon -- a single object might consist of multiple parts\n        # we merge all parts into one mask rle code\n        rles = maskUtils.frPyObjects(mask_ann, img_h, img_w)\n        rle = maskUtils.merge(rles)\n    elif isinstance(mask_ann['counts'], list):\n        # uncompressed RLE\n        rle = maskUtils.frPyObjects(mask_ann, img_h, img_w)\n    else:\n        # rle\n        rle = mask_ann\n    mask = maskUtils.decode(rle)\n    return mask\n\n\ndef _parse_coco_ann_info(ann_info):\n    gt_bboxes = []\n    gt_labels = []\n    gt_bboxes_ignore = []\n    gt_masks_ann = []\n\n    for i, ann in enumerate(ann_info):\n        if ann.get('ignore', False):\n            continue\n        x1, y1, w, h = ann['bbox']\n        if ann['area'] <= 0:\n            continue\n        bbox = [x1, y1, x1 + w, y1 + h]\n        if ann.get('iscrowd', False):\n            gt_bboxes_ignore.append(bbox)\n        else:\n            gt_bboxes.append(bbox)\n            gt_masks_ann.append(ann['segmentation'])\n\n    if gt_bboxes:\n        gt_bboxes = np.array(gt_bboxes, dtype=np.float32)\n        gt_labels = np.array(gt_labels, dtype=np.int64)\n    else:\n        gt_bboxes = np.zeros((0, 4), dtype=np.float32)\n        gt_labels = np.array([], dtype=np.int64)\n\n    if gt_bboxes_ignore:\n        gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)\n    else:\n        gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)\n\n    ann = dict(\n        bboxes=gt_bboxes, bboxes_ignore=gt_bboxes_ignore, masks=gt_masks_ann)\n\n    return ann\n\n\ndef crop_image_patch_v2(pos_proposals, pos_assigned_gt_inds, gt_masks):\n    import torch\n    from torch.nn.modules.utils import _pair\n    device = pos_proposals.device\n    num_pos = pos_proposals.size(0)\n    fake_inds = (\n        torch.arange(num_pos,\n                     device=device).to(dtype=pos_proposals.dtype)[:, None])\n    rois = torch.cat([fake_inds, pos_proposals], dim=1)  # Nx5\n    mask_size = _pair(28)\n    rois = rois.to(device=device)\n    gt_masks_th = (\n        torch.from_numpy(gt_masks).to(device).index_select(\n            0, pos_assigned_gt_inds).to(dtype=rois.dtype))\n    # Use RoIAlign could apparently accelerate the training (~0.1s/iter)\n    targets = (\n        roi_align(gt_masks_th, rois, mask_size[::-1], 1.0, 0, True).squeeze(1))\n    return targets\n\n\ndef crop_image_patch(pos_proposals, gt_masks, pos_assigned_gt_inds, org_img):\n    num_pos = pos_proposals.shape[0]\n    masks = []\n    img_patches = []\n    for i in range(num_pos):\n        gt_mask = gt_masks[pos_assigned_gt_inds[i]]\n        bbox = pos_proposals[i, :].astype(np.int32)\n        x1, y1, x2, y2 = bbox\n        w = np.maximum(x2 - x1 + 1, 1)\n        h = np.maximum(y2 - y1 + 1, 1)\n\n        mask_patch = gt_mask[y1:y1 + h, x1:x1 + w]\n        masked_img = gt_mask[..., None] * org_img\n        img_patch = masked_img[y1:y1 + h, x1:x1 + w]\n\n        img_patches.append(img_patch)\n        masks.append(mask_patch)\n    return img_patches, masks\n\n\ndef create_groundtruth_database(dataset_class_name,\n                                data_path,\n                                info_prefix,\n                                info_path=None,\n                                gap=0,\n                                mask_anno_path=None,\n                                used_classes=None,\n                                database_save_path=None,\n                                db_info_save_path=None,\n                                relative_path=True,\n                                add_rgb=False,\n                                lidar_only=False,\n                                bev_only=False,\n                                coors_range=None,\n                                with_mask=False):\n    \"\"\"Given the raw data, generate the ground truth database.\n\n    Args:\n        dataset_class_name (str): Name of the input dataset.\n        data_path (str): Path of the data.\n        info_prefix (str): Prefix of the info file.\n        info_path (str, optional): Path of the info file.\n            Default: None.\n        mask_anno_path (str, optional): Path of the mask_anno.\n            Default: None.\n        used_classes (list[str], optional): Classes have been used.\n            Default: None.\n        database_save_path (str, optional): Path to save database.\n            Default: None.\n        db_info_save_path (str, optional): Path to save db_info.\n            Default: None.\n        relative_path (bool, optional): Whether to use relative path.\n            Default: True.\n        with_mask (bool, optional): Whether to use mask.\n            Default: False.\n    \"\"\"\n    print(f'Create GT Database of {dataset_class_name}')\n    dataset_cfg = dict(\n        type=dataset_class_name, data_root=data_path, ann_file=info_path)\n    if dataset_class_name == 'KittiDataset':\n        file_client_args = dict(backend='disk')\n        dataset_cfg.update(\n            test_mode=False,\n            split='training',\n            modality=dict(\n                use_lidar=True,\n                use_depth=False,\n                use_lidar_intensity=True,\n                use_camera=with_mask,\n            ),\n            pipeline=[\n                dict(\n                    type='LoadPointsFromFile',\n                    coord_type='LIDAR',\n                    load_dim=4,\n                    use_dim=4,\n                    file_client_args=file_client_args),\n                dict(\n                    type='LoadAnnotations3D',\n                    with_bbox_3d=True,\n                    with_label_3d=True,\n                    file_client_args=file_client_args)\n            ])\n\n    elif dataset_class_name == 'NuScenesDataset':\n\n            class_names = [\n                'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n                'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n            ]\n            file_client_args = dict(backend='disk')\n            data_config = {\n                'cams': [\n                    'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',\n                    'CAM_BACK', 'CAM_BACK_RIGHT'\n                ],\n                'Ncams':\n                6,\n                'input_size': (256, 704),\n                'src_size': (900, 1600),\n\n                # Augmentation\n                'resize': (-0.06, 0.11),\n                'rot': (-5.4, 5.4),\n                'flip': True,\n                'crop_h': (0.0, 0.0),\n                'resize_test': 0.00,\n            }\n            input_modality = dict(\n                    use_lidar=False,\n                    use_camera=True,\n                    use_radar=False,\n                    use_map=False,\n                    use_external=False)\n\n            dataset_cfg.update(\n                img_info_prototype='bevdet',\n                use_valid_flag=True,\n                box_type_3d='LiDAR',\n                modality=input_modality,\n                test_mode=True,\n                pipeline=[\n                    dict(type='PrepareImageInputs', \n                            data_config=data_config,\n                            is_train=False,\n                        ),\n                    dict(\n                            type='LoadAnnotationsBEVDepth',\n                            bda_aug_conf=None,\n                            is_train=False, \n                            classes=class_names),\n                    dict(\n                            type='LoadPointsFromFile',\n                            coord_type='LIDAR',\n                            dtype='float32',\n                            load_dim=5,\n                            use_dim=[0, 1, 2, 3, 4],\n                            translate2ego=False,\n                            file_client_args=file_client_args),\n                    dict(\n                            type='LoadPointsFromMultiSweeps',\n                            sweeps_num=10,\n                            use_dim=[0, 1, 2, 3, 4],\n                            file_client_args=file_client_args,\n                            pad_empty_sweeps=True,\n                            translate2ego=False,\n                            remove_close=True),\n\n                    # dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n                    dict(type='PointsFromLidartoEgo'),\n                    ])\n\n\n    elif dataset_class_name == 'WaymoDataset':\n        file_client_args = dict(backend='disk')\n        dataset_cfg.update(\n            test_mode=False,\n            split='training',\n            modality=dict(\n                use_lidar=True,\n                use_depth=False,\n                use_lidar_intensity=True,\n                use_camera=False,\n            ),\n            pipeline=[\n                dict(\n                    type='LoadPointsFromFile',\n                    coord_type='LIDAR',\n                    load_dim=6,\n                    use_dim=6,\n                    file_client_args=file_client_args),\n                dict(\n                    type='LoadAnnotations3D',\n                    with_bbox_3d=True,\n                    with_label_3d=True,\n                    file_client_args=file_client_args)\n            ])\n\n    dataset = build_dataset(dataset_cfg)\n\n    if database_save_path is None:\n        database_save_path = osp.join(data_path, f'{info_prefix}_gt_database')\n    if db_info_save_path is None:\n        db_info_save_path = osp.join(data_path,\n                                     f'{info_prefix}_dbinfos_train.pkl')\n    mmcv.mkdir_or_exist(database_save_path)\n    all_db_infos = dict()\n    if with_mask:\n        coco = COCO(osp.join(data_path, mask_anno_path))\n        imgIds = coco.getImgIds()\n        file2id = dict()\n        for i in imgIds:\n            info = coco.loadImgs([i])[0]\n            file2id.update({info['file_name']: i})\n\n    group_counter = 0\n    for j in track_iter_progress(list(range(gap, len(dataset), 8))):\n        input_dict = dataset.get_data_info(j)\n        dataset.pre_pipeline(input_dict)\n        example = dataset.pipeline(input_dict)\n\n        \n        # annos = example['ann_info']\n        annos = {}\n        image_idx = example['sample_idx']\n        points = example['points'].tensor.numpy()\n        gt_boxes_3d = example['gt_bboxes_3d'].tensor.numpy()\n        class_names = [\n                'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n                'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n        ]\n        names =  [class_names[i] for i in  example['gt_labels_3d']]\n        # annos['gt_names']\n        group_dict = dict()\n        if 'group_ids' in annos:\n            group_ids = annos['group_ids']\n        else:\n            group_ids = np.arange(gt_boxes_3d.shape[0], dtype=np.int64)\n        difficulty = np.zeros(gt_boxes_3d.shape[0], dtype=np.int32)\n        if 'difficulty' in annos:\n            difficulty = annos['difficulty']\n\n        num_obj = gt_boxes_3d.shape[0]\n        point_indices = box_np_ops.points_in_rbbox(points, gt_boxes_3d)\n\n        if with_mask:\n            # prepare masks\n            gt_boxes = annos['gt_bboxes']\n            img_path = osp.split(example['img_info']['filename'])[-1]\n            if img_path not in file2id.keys():\n                print(f'skip image {img_path} for empty mask')\n                continue\n            img_id = file2id[img_path]\n            kins_annIds = coco.getAnnIds(imgIds=img_id)\n            kins_raw_info = coco.loadAnns(kins_annIds)\n            kins_ann_info = _parse_coco_ann_info(kins_raw_info)\n            h, w = annos['img_shape'][:2]\n            gt_masks = [\n                _poly2mask(mask, h, w) for mask in kins_ann_info['masks']\n            ]\n            # get mask inds based on iou mapping\n            bbox_iou = bbox_overlaps(kins_ann_info['bboxes'], gt_boxes)\n            mask_inds = bbox_iou.argmax(axis=0)\n            valid_inds = (bbox_iou.max(axis=0) > 0.5)\n\n            # mask the image\n            # use more precise crop when it is ready\n            # object_img_patches = np.ascontiguousarray(\n            #     np.stack(object_img_patches, axis=0).transpose(0, 3, 1, 2))\n            # crop image patches using roi_align\n            # object_img_patches = crop_image_patch_v2(\n            #     torch.Tensor(gt_boxes),\n            #     torch.Tensor(mask_inds).long(), object_img_patches)\n            object_img_patches, object_masks = crop_image_patch(\n                gt_boxes, gt_masks, mask_inds, annos['img'])\n\n        for i in range(num_obj):\n            filename = f'{image_idx}_{names[i]}_{i}.bin'\n            abs_filepath = osp.join(database_save_path, filename)\n            rel_filepath = osp.join(f'{info_prefix}_gt_database', filename)\n\n            # save point clouds and image patches for each object\n            gt_points = points[point_indices[:, i]]\n            gt_points[:, :3] -= gt_boxes_3d[i, :3]\n\n            if with_mask:\n                if object_masks[i].sum() == 0 or not valid_inds[i]:\n                    # Skip object for empty or invalid mask\n                    continue\n                img_patch_path = abs_filepath + '.png'\n                mask_patch_path = abs_filepath + '.mask.png'\n                mmcv.imwrite(object_img_patches[i], img_patch_path)\n                mmcv.imwrite(object_masks[i], mask_patch_path)\n\n            with open(abs_filepath, 'w') as f:\n                gt_points.tofile(f)\n\n            if (used_classes is None) or names[i] in used_classes:\n                db_info = {\n                    'name': names[i],\n                    'path': rel_filepath,\n                    'image_idx': image_idx,\n                    'gt_idx': i,\n                    'box3d_lidar': gt_boxes_3d[i],\n                    'num_points_in_gt': gt_points.shape[0],\n                    'difficulty': difficulty[i],\n                }\n                local_group_id = group_ids[i]\n                # if local_group_id >= 0:\n                if local_group_id not in group_dict:\n                    group_dict[local_group_id] = group_counter\n                    group_counter += 1\n                db_info['group_id'] = group_dict[local_group_id]\n                if 'score' in annos:\n                    db_info['score'] = annos['score'][i]\n                if with_mask:\n                    db_info.update({'box2d_camera': gt_boxes[i]})\n                if names[i] in all_db_infos:\n                    all_db_infos[names[i]].append(db_info)\n                else:\n                    all_db_infos[names[i]] = [db_info]\n\n    for k, v in all_db_infos.items():\n        print(f'load {len(v)} {k} database infos')\n\n    with open(db_info_save_path, 'wb') as f:\n        pickle.dump(all_db_infos, f)\n\n\nclass GTDatabaseCreater:\n    \"\"\"Given the raw data, generate the ground truth database. This is the\n    parallel version. For serialized version, please refer to\n    `create_groundtruth_database`\n\n    Args:\n        dataset_class_name (str): Name of the input dataset.\n        data_path (str): Path of the data.\n        info_prefix (str): Prefix of the info file.\n        info_path (str, optional): Path of the info file.\n            Default: None.\n        mask_anno_path (str, optional): Path of the mask_anno.\n            Default: None.\n        used_classes (list[str], optional): Classes have been used.\n            Default: None.\n        database_save_path (str, optional): Path to save database.\n            Default: None.\n        db_info_save_path (str, optional): Path to save db_info.\n            Default: None.\n        relative_path (bool, optional): Whether to use relative path.\n            Default: True.\n        with_mask (bool, optional): Whether to use mask.\n            Default: False.\n        num_worker (int, optional): the number of parallel workers to use.\n            Default: 8.\n    \"\"\"\n\n    def __init__(self,\n                 dataset_class_name,\n                 data_path,\n                 info_prefix,\n                 info_path=None,\n                 mask_anno_path=None,\n                 used_classes=None,\n                 database_save_path=None,\n                 db_info_save_path=None,\n                 relative_path=True,\n                 add_rgb=False,\n                 lidar_only=False,\n                 bev_only=False,\n                 coors_range=None,\n                 with_mask=False,\n                 num_worker=8) -> None:\n        self.dataset_class_name = dataset_class_name\n        self.data_path = data_path\n        self.info_prefix = info_prefix\n        self.info_path = info_path\n        self.mask_anno_path = mask_anno_path\n        self.used_classes = used_classes\n        self.database_save_path = database_save_path\n        self.db_info_save_path = db_info_save_path\n        self.relative_path = relative_path\n        self.add_rgb = add_rgb\n        self.lidar_only = lidar_only\n        self.bev_only = bev_only\n        self.coors_range = coors_range\n        self.with_mask = with_mask\n        self.num_worker = num_worker\n        self.pipeline = None\n\n    def create_single(self, input_dict):\n        group_counter = 0\n        single_db_infos = dict()\n        example = self.pipeline(input_dict)\n        annos = example['ann_info']\n        image_idx = example['sample_idx']\n        points = example['points'].tensor.numpy()\n        gt_boxes_3d = annos['gt_bboxes_3d'].tensor.numpy()\n        names = annos['gt_names']\n        group_dict = dict()\n        if 'group_ids' in annos:\n            group_ids = annos['group_ids']\n        else:\n            group_ids = np.arange(gt_boxes_3d.shape[0], dtype=np.int64)\n        difficulty = np.zeros(gt_boxes_3d.shape[0], dtype=np.int32)\n        if 'difficulty' in annos:\n            difficulty = annos['difficulty']\n\n        num_obj = gt_boxes_3d.shape[0]\n        point_indices = box_np_ops.points_in_rbbox(points, gt_boxes_3d)\n\n        if self.with_mask:\n            # prepare masks\n            gt_boxes = annos['gt_bboxes']\n            img_path = osp.split(example['img_info']['filename'])[-1]\n            if img_path not in self.file2id.keys():\n                print(f'skip image {img_path} for empty mask')\n                return single_db_infos\n            img_id = self.file2id[img_path]\n            kins_annIds = self.coco.getAnnIds(imgIds=img_id)\n            kins_raw_info = self.coco.loadAnns(kins_annIds)\n            kins_ann_info = _parse_coco_ann_info(kins_raw_info)\n            h, w = annos['img_shape'][:2]\n            gt_masks = [\n                _poly2mask(mask, h, w) for mask in kins_ann_info['masks']\n            ]\n            # get mask inds based on iou mapping\n            bbox_iou = bbox_overlaps(kins_ann_info['bboxes'], gt_boxes)\n            mask_inds = bbox_iou.argmax(axis=0)\n            valid_inds = (bbox_iou.max(axis=0) > 0.5)\n\n            # mask the image\n            # use more precise crop when it is ready\n            # object_img_patches = np.ascontiguousarray(\n            #     np.stack(object_img_patches, axis=0).transpose(0, 3, 1, 2))\n            # crop image patches using roi_align\n            # object_img_patches = crop_image_patch_v2(\n            #     torch.Tensor(gt_boxes),\n            #     torch.Tensor(mask_inds).long(), object_img_patches)\n            object_img_patches, object_masks = crop_image_patch(\n                gt_boxes, gt_masks, mask_inds, annos['img'])\n\n        for i in range(num_obj):\n            filename = f'{image_idx}_{names[i]}_{i}.bin'\n            abs_filepath = osp.join(self.database_save_path, filename)\n            rel_filepath = osp.join(f'{self.info_prefix}_gt_database',\n                                    filename)\n\n            # save point clouds and image patches for each object\n            gt_points = points[point_indices[:, i]]\n            gt_points[:, :3] -= gt_boxes_3d[i, :3]\n\n            if self.with_mask:\n                if object_masks[i].sum() == 0 or not valid_inds[i]:\n                    # Skip object for empty or invalid mask\n                    continue\n                img_patch_path = abs_filepath + '.png'\n                mask_patch_path = abs_filepath + '.mask.png'\n                mmcv.imwrite(object_img_patches[i], img_patch_path)\n                mmcv.imwrite(object_masks[i], mask_patch_path)\n\n            with open(abs_filepath, 'w') as f:\n                gt_points.tofile(f)\n\n            if (self.used_classes is None) or names[i] in self.used_classes:\n                db_info = {\n                    'name': names[i],\n                    'path': rel_filepath,\n                    'image_idx': image_idx,\n                    'gt_idx': i,\n                    'box3d_lidar': gt_boxes_3d[i],\n                    'num_points_in_gt': gt_points.shape[0],\n                    'difficulty': difficulty[i],\n                }\n                local_group_id = group_ids[i]\n                # if local_group_id >= 0:\n                if local_group_id not in group_dict:\n                    group_dict[local_group_id] = group_counter\n                    group_counter += 1\n                db_info['group_id'] = group_dict[local_group_id]\n                if 'score' in annos:\n                    db_info['score'] = annos['score'][i]\n                if self.with_mask:\n                    db_info.update({'box2d_camera': gt_boxes[i]})\n                if names[i] in single_db_infos:\n                    single_db_infos[names[i]].append(db_info)\n                else:\n                    single_db_infos[names[i]] = [db_info]\n\n        return single_db_infos\n\n    def create(self):\n        print(f'Create GT Database of {self.dataset_class_name}')\n        dataset_cfg = dict(\n            type=self.dataset_class_name,\n            data_root=self.data_path,\n            ann_file=self.info_path)\n        if self.dataset_class_name == 'KittiDataset':\n            file_client_args = dict(backend='disk')\n            dataset_cfg.update(\n                test_mode=False,\n                split='training',\n                modality=dict(\n                    use_lidar=True,\n                    use_depth=False,\n                    use_lidar_intensity=True,\n                    use_camera=self.with_mask,\n                ),\n                pipeline=[\n                    dict(\n                        type='LoadPointsFromFile',\n                        coord_type='LIDAR',\n                        load_dim=4,\n                        use_dim=4,\n                        file_client_args=file_client_args),\n                    dict(\n                        type='LoadAnnotations3D',\n                        with_bbox_3d=True,\n                        with_label_3d=True,\n                        file_client_args=file_client_args)\n                ])\n\n        elif self.dataset_class_name == 'NuScenesDataset':\n            dataset_cfg.update(\n                img_info_prototype='bevdet',\n                use_valid_flag=True,\n                box_type_3d='LiDAR',\n                test_mode=True,\n                pipeline=[\n                    dict(type='PrepareImageInputs', \n                            is_train=False,\n                        ),\n                    dict(\n                            type='LoadAnnotationsBEVDepth',\n                            bda_aug_conf=None,\n                            is_train=False, \n                            classes=class_names),\n                    dict(\n                            type='LoadPointsFromFile',\n                            coord_type='LIDAR',\n                            dtype='float32',\n                            load_dim=5,\n                            use_dim=[0, 1, 2, 3, 4],\n                            translate2ego=False,\n                            file_client_args=file_client_args),\n                    dict(\n                            type='LoadPointsFromMultiSweeps',\n                            sweeps_num=10,\n                            use_dim=[0, 1, 2, 3, 4],\n                            file_client_args=file_client_args,\n                            pad_empty_sweeps=True,\n                            translate2ego=False,\n                            remove_close=True),\n\n                    # dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n                    dict(type='PointsFromLidartoEgo'),\n                    ])\n\n        elif self.dataset_class_name == 'WaymoDataset':\n            file_client_args = dict(backend='disk')\n            dataset_cfg.update(\n                test_mode=False,\n                split='training',\n                modality=dict(\n                    use_lidar=True,\n                    use_depth=False,\n                    use_lidar_intensity=True,\n                    use_camera=False,\n                ),\n                pipeline=[\n                    dict(\n                        type='LoadPointsFromFile',\n                        coord_type='LIDAR',\n                        load_dim=6,\n                        use_dim=6,\n                        file_client_args=file_client_args),\n                    dict(\n                        type='LoadAnnotations3D',\n                        with_bbox_3d=True,\n                        with_label_3d=True,\n                        file_client_args=file_client_args)\n                ])\n\n        dataset = build_dataset(dataset_cfg)\n        self.pipeline = dataset.pipeline\n        if self.database_save_path is None:\n            self.database_save_path = osp.join(\n                self.data_path, f'{self.info_prefix}_gt_database')\n        if self.db_info_save_path is None:\n            self.db_info_save_path = osp.join(\n                self.data_path, f'{self.info_prefix}_dbinfos_train.pkl')\n        mmcv.mkdir_or_exist(self.database_save_path)\n        if self.with_mask:\n            self.coco = COCO(osp.join(self.data_path, self.mask_anno_path))\n            imgIds = self.coco.getImgIds()\n            self.file2id = dict()\n            for i in imgIds:\n                info = self.coco.loadImgs([i])[0]\n                self.file2id.update({info['file_name']: i})\n\n        def loop_dataset(i):\n            input_dict = dataset.get_data_info(i)\n            dataset.pre_pipeline(input_dict)\n            return input_dict\n\n        multi_db_infos = mmcv.track_parallel_progress(\n            self.create_single, ((loop_dataset(i)\n                                  for i in range(len(dataset))), len(dataset)),\n            self.num_worker)\n        print('Make global unique group id')\n        group_counter_offset = 0\n        all_db_infos = dict()\n        for single_db_infos in track_iter_progress(multi_db_infos):\n            group_id = -1\n            for name, name_db_infos in single_db_infos.items():\n                for db_info in name_db_infos:\n                    group_id = max(group_id, db_info['group_id'])\n                    db_info['group_id'] += group_counter_offset\n                if name not in all_db_infos:\n                    all_db_infos[name] = []\n                all_db_infos[name].extend(name_db_infos)\n            group_counter_offset += (group_id + 1)\n\n        for k, v in all_db_infos.items():\n            print(f'load {len(v)} {k} database infos')\n\n        with open(self.db_info_save_path, 'wb') as f:\n            pickle.dump(all_db_infos, f)\nimport argparse\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser(description='gap')\n    parser.add_argument('gap', default=0, type=int, help='gap')\n    args = parser.parse_args()\n    create_groundtruth_database('NuScenesDataset', '/mount/data/lsbevv2/data/nuscenes', 'bevdetv2-nuscenes',\n                                '/mount/data/lsbevv2/data/nuscenes/bevdetv2-nuscenes_infos_train.pkl', gap=args.gap)\n"
  },
  {
    "path": "tools/data_converter/imgaug_demo.py",
    "content": "\n\n#!usr/bin/python\n# -*- coding: utf-8 -*-\n\n\nimport cv2\nimport random\nimport os\nimport os.path as osp\nfrom matplotlib import pyplot as plt\n# import albumentations as A\nfrom imgaug import augmenters as iaa\nfrom nuscenes import NuScenes\nfrom nuscenes.utils import splits\nfog_aug = iaa.Fog()\nsnow_aug = iaa.Snowflakes(flake_size=(0.7, 0.95), speed=(0.001, 0.03))\nrain_aug = iaa.Rain(drop_size=(0.10, 0.20))\nnoise_aug = iaa.imgcorruptlike.GaussianNoise(severity=1)\n# transform = A.Compose(\n#     [A.RandomSunFlare(flare_roi=(0, 0, 1, 0.5), angle_lower=0.5, p=1)],\n# )\nimport mmcv \ndef get_available_scenes(nusc):\n    \"\"\"Get available scenes from the input nuscenes class.\n\n    Given the raw data, get the information of available scenes for\n    further info generation.\n\n    Args:\n        nusc (class): Dataset class in the nuScenes dataset.\n\n    Returns:\n        available_scenes (list[dict]): List of basic information for the\n            available scenes.\n    \"\"\"\n    available_scenes = []\n    print('total scene num: {}'.format(len(nusc.scene)))\n    for scene in nusc.scene:\n        scene_token = scene['token']\n        scene_rec = nusc.get('scene', scene_token)\n        sample_rec = nusc.get('sample', scene_rec['first_sample_token'])\n        sd_rec = nusc.get('sample_data', sample_rec['data']['LIDAR_TOP'])\n        has_more_frames = True\n        scene_not_exist = False\n        while has_more_frames:\n            lidar_path, boxes, _ = nusc.get_sample_data(sd_rec['token'])\n            lidar_path = str(lidar_path)\n            if os.getcwd() in lidar_path:\n                # path from lyftdataset is absolute path\n                lidar_path = lidar_path.split(f'{os.getcwd()}/')[-1]\n                # relative path\n            if not mmcv.is_filepath(lidar_path):\n                scene_not_exist = True\n                break\n            else:\n                break\n        if scene_not_exist:\n            continue\n        available_scenes.append(scene)\n    print('exist scene num: {}'.format(len(available_scenes)))\n    return available_scenes\n\nVERSION= 'v1.0-trainval'\nNUSCENES = 'nuscenes'\nnuscenes_version = VERSION\ndataroot = f'./data/{NUSCENES}/'\nnuscenes = NuScenes(nuscenes_version, dataroot)\nval_scenes = splits.val\n# filter existing scenes.\navailable_scenes = get_available_scenes(nuscenes)\navailable_scene_names = [s['name'] for s in available_scenes]\nval_scenes = list(filter(lambda x: x in available_scene_names, val_scenes))\nval_scenes = set([\n        available_scenes[available_scene_names.index(s)]['token']\n        for s in val_scenes\n   ])\n\nval_imgs = set()\nfor sample in mmcv.track_iter_progress(nuscenes.sample):\n   camera_types = [\n            'CAM_FRONT',\n            'CAM_FRONT_RIGHT',\n            'CAM_FRONT_LEFT',\n            'CAM_BACK',\n            'CAM_BACK_LEFT',\n            'CAM_BACK_RIGHT',\n   ]\n   if sample['scene_token'] in val_scenes:\n      for cam in camera_types:\n         cam_token = sample['data'][cam]\n         cam_path, _, cam_intrinsic = nuscenes.get_sample_data(cam_token)\n         val_imgs.add(cam_path.split('/')[-1])\n\n\naug_mapper = dict(\n   fog=iaa.Fog(),\n   snow=iaa.Snowflakes(flake_size=(0.7, 0.95), speed=(0.001, 0.03)),\n   rain=iaa.Rain(drop_size=(0.10, 0.20)),\n   noise=iaa.imgcorruptlike.GaussianNoise(severity=1)\n)\n#imgaug test\n\nori_sample_path = '/mount/data/FBBEV/data/nuscenes/samples'\ndet_sample_path = '/mount/data/FBBEV/data/nuscenes_aug/samples_rain'\ncams = os.listdir(det_sample_path)\nfor cam in cams:\n   imgs = os.listdir(osp.join(ori_sample_path, cam))\n   for img_name in imgs:\n      imglist=[]\n      if img_name not in val_imgs: continue\n      img_path = osp.join(ori_sample_path, cam, img_name)\n      print(img_path)\n      img = cv2.imread(img_path)\n      img = cv2.resize(img, (800, 450))\n      imglist.append(img)\n      augs = ['noise']# ['fog', 'rain', 'snow', 'noise']\n      for aug_key in augs:\n         \n         seq = iaa.Sequential([\n             aug_mapper[aug_key]\n         ])\n         images_aug = seq.augment_images(imglist)\n         images_aug = cv2.resize(images_aug[0], (1600, 900))\n         # print(f'/mount/data/FBBEV/data/nuscenes_aug/samples_{aug_key}/{cam}/{img_name}')\n         cv2.imwrite(f'/mount/data/FBBEV/data/nuscenes_aug/samples_{aug_key}/{cam}/{img_name}', images_aug)\n\n\n    #   images_aug = transform(image=img)['image']\n    #   images_aug = cv2.resize(images_aug, (1600, 900))\n    #   cv2.imwrite(f'/mount/data/FBBEV/data/nuscenes_aug/samples_sun/{cam}/{img_name}', images_aug)\n\n\n\n\n"
  },
  {
    "path": "tools/data_converter/indoor_converter.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os\n\nimport mmcv\nimport numpy as np\n\nfrom tools.data_converter.s3dis_data_utils import S3DISData, S3DISSegData\nfrom tools.data_converter.scannet_data_utils import ScanNetData, ScanNetSegData\nfrom tools.data_converter.sunrgbd_data_utils import SUNRGBDData\n\n\ndef create_indoor_info_file(data_path,\n                            pkl_prefix='sunrgbd',\n                            save_path=None,\n                            workers=4,\n                            **kwargs):\n    \"\"\"Create indoor information file.\n\n    Get information of the raw data and save it to the pkl file.\n\n    Args:\n        data_path (str): Path of the data.\n        pkl_prefix (str, optional): Prefix of the pkl to be saved.\n            Default: 'sunrgbd'.\n        save_path (str, optional): Path of the pkl to be saved. Default: None.\n        workers (int, optional): Number of threads to be used. Default: 4.\n        kwargs (dict): Additional parameters for dataset-specific Data class.\n            May include `use_v1` for SUN RGB-D and `num_points`.\n    \"\"\"\n    assert os.path.exists(data_path)\n    assert pkl_prefix in ['sunrgbd', 'scannet', 's3dis'], \\\n        f'unsupported indoor dataset {pkl_prefix}'\n    save_path = data_path if save_path is None else save_path\n    assert os.path.exists(save_path)\n\n    # generate infos for both detection and segmentation task\n    if pkl_prefix in ['sunrgbd', 'scannet']:\n        train_filename = os.path.join(save_path,\n                                      f'{pkl_prefix}_infos_train.pkl')\n        val_filename = os.path.join(save_path, f'{pkl_prefix}_infos_val.pkl')\n        if pkl_prefix == 'sunrgbd':\n            # SUN RGB-D has a train-val split\n            num_points = kwargs.get('num_points', -1)\n            use_v1 = kwargs.get('use_v1', False)\n            train_dataset = SUNRGBDData(\n                root_path=data_path,\n                split='train',\n                use_v1=use_v1,\n                num_points=num_points)\n            val_dataset = SUNRGBDData(\n                root_path=data_path,\n                split='val',\n                use_v1=use_v1,\n                num_points=num_points)\n        else:\n            # ScanNet has a train-val-test split\n            train_dataset = ScanNetData(root_path=data_path, split='train')\n            val_dataset = ScanNetData(root_path=data_path, split='val')\n            test_dataset = ScanNetData(root_path=data_path, split='test')\n            test_filename = os.path.join(save_path,\n                                         f'{pkl_prefix}_infos_test.pkl')\n\n        infos_train = train_dataset.get_infos(\n            num_workers=workers, has_label=True)\n        mmcv.dump(infos_train, train_filename, 'pkl')\n        print(f'{pkl_prefix} info train file is saved to {train_filename}')\n\n        infos_val = val_dataset.get_infos(num_workers=workers, has_label=True)\n        mmcv.dump(infos_val, val_filename, 'pkl')\n        print(f'{pkl_prefix} info val file is saved to {val_filename}')\n\n    if pkl_prefix == 'scannet':\n        infos_test = test_dataset.get_infos(\n            num_workers=workers, has_label=False)\n        mmcv.dump(infos_test, test_filename, 'pkl')\n        print(f'{pkl_prefix} info test file is saved to {test_filename}')\n\n    # generate infos for the semantic segmentation task\n    # e.g. re-sampled scene indexes and label weights\n    # scene indexes are used to re-sample rooms with different number of points\n    # label weights are used to balance classes with different number of points\n    if pkl_prefix == 'scannet':\n        # label weight computation function is adopted from\n        # https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24\n        num_points = kwargs.get('num_points', 8192)\n        train_dataset = ScanNetSegData(\n            data_root=data_path,\n            ann_file=train_filename,\n            split='train',\n            num_points=num_points,\n            label_weight_func=lambda x: 1.0 / np.log(1.2 + x))\n        # TODO: do we need to generate on val set?\n        val_dataset = ScanNetSegData(\n            data_root=data_path,\n            ann_file=val_filename,\n            split='val',\n            num_points=num_points,\n            label_weight_func=lambda x: 1.0 / np.log(1.2 + x))\n        # no need to generate for test set\n        train_dataset.get_seg_infos()\n        val_dataset.get_seg_infos()\n    elif pkl_prefix == 's3dis':\n        # S3DIS doesn't have a fixed train-val split\n        # it has 6 areas instead, so we generate info file for each of them\n        # in training, we will use dataset to wrap different areas\n        splits = [f'Area_{i}' for i in [1, 2, 3, 4, 5, 6]]\n        for split in splits:\n            dataset = S3DISData(root_path=data_path, split=split)\n            info = dataset.get_infos(num_workers=workers, has_label=True)\n            filename = os.path.join(save_path,\n                                    f'{pkl_prefix}_infos_{split}.pkl')\n            mmcv.dump(info, filename, 'pkl')\n            print(f'{pkl_prefix} info {split} file is saved to {filename}')\n            num_points = kwargs.get('num_points', 4096)\n            seg_dataset = S3DISSegData(\n                data_root=data_path,\n                ann_file=filename,\n                split=split,\n                num_points=num_points,\n                label_weight_func=lambda x: 1.0 / np.log(1.2 + x))\n            seg_dataset.get_seg_infos()\n"
  },
  {
    "path": "tools/data_converter/kitti_converter.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom collections import OrderedDict\nfrom pathlib import Path\n\nimport mmcv\nimport numpy as np\nfrom nuscenes.utils.geometry_utils import view_points\n\nfrom mmdet3d.core.bbox import box_np_ops, points_cam2img\nfrom .kitti_data_utils import WaymoInfoGatherer, get_kitti_image_info\nfrom .nuscenes_converter import post_process_coords\n\nkitti_categories = ('Pedestrian', 'Cyclist', 'Car')\n\n\ndef convert_to_kitti_info_version2(info):\n    \"\"\"convert kitti info v1 to v2 if possible.\n\n    Args:\n        info (dict): Info of the input kitti data.\n            - image (dict): image info\n            - calib (dict): calibration info\n            - point_cloud (dict): point cloud info\n    \"\"\"\n    if 'image' not in info or 'calib' not in info or 'point_cloud' not in info:\n        info['image'] = {\n            'image_shape': info['img_shape'],\n            'image_idx': info['image_idx'],\n            'image_path': info['img_path'],\n        }\n        info['calib'] = {\n            'R0_rect': info['calib/R0_rect'],\n            'Tr_velo_to_cam': info['calib/Tr_velo_to_cam'],\n            'P2': info['calib/P2'],\n        }\n        info['point_cloud'] = {\n            'velodyne_path': info['velodyne_path'],\n        }\n\n\ndef _read_imageset_file(path):\n    with open(path, 'r') as f:\n        lines = f.readlines()\n    return [int(line) for line in lines]\n\n\nclass _NumPointsInGTCalculater:\n    \"\"\"Calculate the number of points inside the ground truth box. This is the\n    parallel version. For the serialized version, please refer to\n    `_calculate_num_points_in_gt`.\n\n    Args:\n        data_path (str): Path of the data.\n        relative_path (bool): Whether to use relative path.\n        remove_outside (bool, optional): Whether to remove points which are\n            outside of image. Default: True.\n        num_features (int, optional): Number of features per point.\n            Default: False.\n        num_worker (int, optional): the number of parallel workers to use.\n            Default: 8.\n    \"\"\"\n\n    def __init__(self,\n                 data_path,\n                 relative_path,\n                 remove_outside=True,\n                 num_features=4,\n                 num_worker=8) -> None:\n        self.data_path = data_path\n        self.relative_path = relative_path\n        self.remove_outside = remove_outside\n        self.num_features = num_features\n        self.num_worker = num_worker\n\n    def calculate_single(self, info):\n        pc_info = info['point_cloud']\n        image_info = info['image']\n        calib = info['calib']\n        if self.relative_path:\n            v_path = str(Path(self.data_path) / pc_info['velodyne_path'])\n        else:\n            v_path = pc_info['velodyne_path']\n        points_v = np.fromfile(\n            v_path, dtype=np.float32,\n            count=-1).reshape([-1, self.num_features])\n        rect = calib['R0_rect']\n        Trv2c = calib['Tr_velo_to_cam']\n        P2 = calib['P2']\n        if self.remove_outside:\n            points_v = box_np_ops.remove_outside_points(\n                points_v, rect, Trv2c, P2, image_info['image_shape'])\n        annos = info['annos']\n        num_obj = len([n for n in annos['name'] if n != 'DontCare'])\n        dims = annos['dimensions'][:num_obj]\n        loc = annos['location'][:num_obj]\n        rots = annos['rotation_y'][:num_obj]\n        gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]],\n                                         axis=1)\n        gt_boxes_lidar = box_np_ops.box_camera_to_lidar(\n            gt_boxes_camera, rect, Trv2c)\n        indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar)\n        num_points_in_gt = indices.sum(0)\n        num_ignored = len(annos['dimensions']) - num_obj\n        num_points_in_gt = np.concatenate(\n            [num_points_in_gt, -np.ones([num_ignored])])\n        annos['num_points_in_gt'] = num_points_in_gt.astype(np.int32)\n        return info\n\n    def calculate(self, infos):\n        ret_infos = mmcv.track_parallel_progress(self.calculate_single, infos,\n                                                 self.num_worker)\n        for i, ret_info in enumerate(ret_infos):\n            infos[i] = ret_info\n\n\ndef _calculate_num_points_in_gt(data_path,\n                                infos,\n                                relative_path,\n                                remove_outside=True,\n                                num_features=4):\n    for info in mmcv.track_iter_progress(infos):\n        pc_info = info['point_cloud']\n        image_info = info['image']\n        calib = info['calib']\n        if relative_path:\n            v_path = str(Path(data_path) / pc_info['velodyne_path'])\n        else:\n            v_path = pc_info['velodyne_path']\n        points_v = np.fromfile(\n            v_path, dtype=np.float32, count=-1).reshape([-1, num_features])\n        rect = calib['R0_rect']\n        Trv2c = calib['Tr_velo_to_cam']\n        P2 = calib['P2']\n        if remove_outside:\n            points_v = box_np_ops.remove_outside_points(\n                points_v, rect, Trv2c, P2, image_info['image_shape'])\n\n        # points_v = points_v[points_v[:, 0] > 0]\n        annos = info['annos']\n        num_obj = len([n for n in annos['name'] if n != 'DontCare'])\n        # annos = kitti.filter_kitti_anno(annos, ['DontCare'])\n        dims = annos['dimensions'][:num_obj]\n        loc = annos['location'][:num_obj]\n        rots = annos['rotation_y'][:num_obj]\n        gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]],\n                                         axis=1)\n        gt_boxes_lidar = box_np_ops.box_camera_to_lidar(\n            gt_boxes_camera, rect, Trv2c)\n        indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar)\n        num_points_in_gt = indices.sum(0)\n        num_ignored = len(annos['dimensions']) - num_obj\n        num_points_in_gt = np.concatenate(\n            [num_points_in_gt, -np.ones([num_ignored])])\n        annos['num_points_in_gt'] = num_points_in_gt.astype(np.int32)\n\n\ndef create_kitti_info_file(data_path,\n                           pkl_prefix='kitti',\n                           with_plane=False,\n                           save_path=None,\n                           relative_path=True):\n    \"\"\"Create info file of KITTI dataset.\n\n    Given the raw data, generate its related info file in pkl format.\n\n    Args:\n        data_path (str): Path of the data root.\n        pkl_prefix (str, optional): Prefix of the info file to be generated.\n            Default: 'kitti'.\n        with_plane (bool, optional): Whether to use plane information.\n            Default: False.\n        save_path (str, optional): Path to save the info file.\n            Default: None.\n        relative_path (bool, optional): Whether to use relative path.\n            Default: True.\n    \"\"\"\n    imageset_folder = Path(data_path) / 'ImageSets'\n    train_img_ids = _read_imageset_file(str(imageset_folder / 'train.txt'))\n    val_img_ids = _read_imageset_file(str(imageset_folder / 'val.txt'))\n    test_img_ids = _read_imageset_file(str(imageset_folder / 'test.txt'))\n\n    print('Generate info. this may take several minutes.')\n    if save_path is None:\n        save_path = Path(data_path)\n    else:\n        save_path = Path(save_path)\n    kitti_infos_train = get_kitti_image_info(\n        data_path,\n        training=True,\n        velodyne=True,\n        calib=True,\n        with_plane=with_plane,\n        image_ids=train_img_ids,\n        relative_path=relative_path)\n    _calculate_num_points_in_gt(data_path, kitti_infos_train, relative_path)\n    filename = save_path / f'{pkl_prefix}_infos_train.pkl'\n    print(f'Kitti info train file is saved to {filename}')\n    mmcv.dump(kitti_infos_train, filename)\n    kitti_infos_val = get_kitti_image_info(\n        data_path,\n        training=True,\n        velodyne=True,\n        calib=True,\n        with_plane=with_plane,\n        image_ids=val_img_ids,\n        relative_path=relative_path)\n    _calculate_num_points_in_gt(data_path, kitti_infos_val, relative_path)\n    filename = save_path / f'{pkl_prefix}_infos_val.pkl'\n    print(f'Kitti info val file is saved to {filename}')\n    mmcv.dump(kitti_infos_val, filename)\n    filename = save_path / f'{pkl_prefix}_infos_trainval.pkl'\n    print(f'Kitti info trainval file is saved to {filename}')\n    mmcv.dump(kitti_infos_train + kitti_infos_val, filename)\n\n    kitti_infos_test = get_kitti_image_info(\n        data_path,\n        training=False,\n        label_info=False,\n        velodyne=True,\n        calib=True,\n        with_plane=False,\n        image_ids=test_img_ids,\n        relative_path=relative_path)\n    filename = save_path / f'{pkl_prefix}_infos_test.pkl'\n    print(f'Kitti info test file is saved to {filename}')\n    mmcv.dump(kitti_infos_test, filename)\n\n\ndef create_waymo_info_file(data_path,\n                           pkl_prefix='waymo',\n                           save_path=None,\n                           relative_path=True,\n                           max_sweeps=5,\n                           workers=8):\n    \"\"\"Create info file of waymo dataset.\n\n    Given the raw data, generate its related info file in pkl format.\n\n    Args:\n        data_path (str): Path of the data root.\n        pkl_prefix (str, optional): Prefix of the info file to be generated.\n            Default: 'waymo'.\n        save_path (str, optional): Path to save the info file.\n            Default: None.\n        relative_path (bool, optional): Whether to use relative path.\n            Default: True.\n        max_sweeps (int, optional): Max sweeps before the detection frame\n            to be used. Default: 5.\n    \"\"\"\n    imageset_folder = Path(data_path) / 'ImageSets'\n    train_img_ids = _read_imageset_file(str(imageset_folder / 'train.txt'))\n    val_img_ids = _read_imageset_file(str(imageset_folder / 'val.txt'))\n    test_img_ids = _read_imageset_file(str(imageset_folder / 'test.txt'))\n\n    print('Generate info. this may take several minutes.')\n    if save_path is None:\n        save_path = Path(data_path)\n    else:\n        save_path = Path(save_path)\n    waymo_infos_gatherer_trainval = WaymoInfoGatherer(\n        data_path,\n        training=True,\n        velodyne=True,\n        calib=True,\n        pose=True,\n        relative_path=relative_path,\n        max_sweeps=max_sweeps,\n        num_worker=workers)\n    waymo_infos_gatherer_test = WaymoInfoGatherer(\n        data_path,\n        training=False,\n        label_info=False,\n        velodyne=True,\n        calib=True,\n        pose=True,\n        relative_path=relative_path,\n        max_sweeps=max_sweeps,\n        num_worker=workers)\n    num_points_in_gt_calculater = _NumPointsInGTCalculater(\n        data_path,\n        relative_path,\n        num_features=6,\n        remove_outside=False,\n        num_worker=workers)\n\n    waymo_infos_train = waymo_infos_gatherer_trainval.gather(train_img_ids)\n    num_points_in_gt_calculater.calculate(waymo_infos_train)\n    filename = save_path / f'{pkl_prefix}_infos_train.pkl'\n    print(f'Waymo info train file is saved to {filename}')\n    mmcv.dump(waymo_infos_train, filename)\n    waymo_infos_val = waymo_infos_gatherer_trainval.gather(val_img_ids)\n    num_points_in_gt_calculater.calculate(waymo_infos_val)\n    filename = save_path / f'{pkl_prefix}_infos_val.pkl'\n    print(f'Waymo info val file is saved to {filename}')\n    mmcv.dump(waymo_infos_val, filename)\n    filename = save_path / f'{pkl_prefix}_infos_trainval.pkl'\n    print(f'Waymo info trainval file is saved to {filename}')\n    mmcv.dump(waymo_infos_train + waymo_infos_val, filename)\n    waymo_infos_test = waymo_infos_gatherer_test.gather(test_img_ids)\n    filename = save_path / f'{pkl_prefix}_infos_test.pkl'\n    print(f'Waymo info test file is saved to {filename}')\n    mmcv.dump(waymo_infos_test, filename)\n\n\ndef _create_reduced_point_cloud(data_path,\n                                info_path,\n                                save_path=None,\n                                back=False,\n                                num_features=4,\n                                front_camera_id=2):\n    \"\"\"Create reduced point clouds for given info.\n\n    Args:\n        data_path (str): Path of original data.\n        info_path (str): Path of data info.\n        save_path (str, optional): Path to save reduced point cloud\n            data. Default: None.\n        back (bool, optional): Whether to flip the points to back.\n            Default: False.\n        num_features (int, optional): Number of point features. Default: 4.\n        front_camera_id (int, optional): The referenced/front camera ID.\n            Default: 2.\n    \"\"\"\n    kitti_infos = mmcv.load(info_path)\n\n    for info in mmcv.track_iter_progress(kitti_infos):\n        pc_info = info['point_cloud']\n        image_info = info['image']\n        calib = info['calib']\n\n        v_path = pc_info['velodyne_path']\n        v_path = Path(data_path) / v_path\n        points_v = np.fromfile(\n            str(v_path), dtype=np.float32,\n            count=-1).reshape([-1, num_features])\n        rect = calib['R0_rect']\n        if front_camera_id == 2:\n            P2 = calib['P2']\n        else:\n            P2 = calib[f'P{str(front_camera_id)}']\n        Trv2c = calib['Tr_velo_to_cam']\n        # first remove z < 0 points\n        # keep = points_v[:, -1] > 0\n        # points_v = points_v[keep]\n        # then remove outside.\n        if back:\n            points_v[:, 0] = -points_v[:, 0]\n        points_v = box_np_ops.remove_outside_points(points_v, rect, Trv2c, P2,\n                                                    image_info['image_shape'])\n        if save_path is None:\n            save_dir = v_path.parent.parent / (v_path.parent.stem + '_reduced')\n            if not save_dir.exists():\n                save_dir.mkdir()\n            save_filename = save_dir / v_path.name\n            # save_filename = str(v_path) + '_reduced'\n            if back:\n                save_filename += '_back'\n        else:\n            save_filename = str(Path(save_path) / v_path.name)\n            if back:\n                save_filename += '_back'\n        with open(save_filename, 'w') as f:\n            points_v.tofile(f)\n\n\ndef create_reduced_point_cloud(data_path,\n                               pkl_prefix,\n                               train_info_path=None,\n                               val_info_path=None,\n                               test_info_path=None,\n                               save_path=None,\n                               with_back=False):\n    \"\"\"Create reduced point clouds for training/validation/testing.\n\n    Args:\n        data_path (str): Path of original data.\n        pkl_prefix (str): Prefix of info files.\n        train_info_path (str, optional): Path of training set info.\n            Default: None.\n        val_info_path (str, optional): Path of validation set info.\n            Default: None.\n        test_info_path (str, optional): Path of test set info.\n            Default: None.\n        save_path (str, optional): Path to save reduced point cloud data.\n            Default: None.\n        with_back (bool, optional): Whether to flip the points to back.\n            Default: False.\n    \"\"\"\n    if train_info_path is None:\n        train_info_path = Path(data_path) / f'{pkl_prefix}_infos_train.pkl'\n    if val_info_path is None:\n        val_info_path = Path(data_path) / f'{pkl_prefix}_infos_val.pkl'\n    if test_info_path is None:\n        test_info_path = Path(data_path) / f'{pkl_prefix}_infos_test.pkl'\n\n    print('create reduced point cloud for training set')\n    _create_reduced_point_cloud(data_path, train_info_path, save_path)\n    print('create reduced point cloud for validation set')\n    _create_reduced_point_cloud(data_path, val_info_path, save_path)\n    print('create reduced point cloud for testing set')\n    _create_reduced_point_cloud(data_path, test_info_path, save_path)\n    if with_back:\n        _create_reduced_point_cloud(\n            data_path, train_info_path, save_path, back=True)\n        _create_reduced_point_cloud(\n            data_path, val_info_path, save_path, back=True)\n        _create_reduced_point_cloud(\n            data_path, test_info_path, save_path, back=True)\n\n\ndef export_2d_annotation(root_path, info_path, mono3d=True):\n    \"\"\"Export 2d annotation from the info file and raw data.\n\n    Args:\n        root_path (str): Root path of the raw data.\n        info_path (str): Path of the info file.\n        mono3d (bool, optional): Whether to export mono3d annotation.\n            Default: True.\n    \"\"\"\n    # get bbox annotations for camera\n    kitti_infos = mmcv.load(info_path)\n    cat2Ids = [\n        dict(id=kitti_categories.index(cat_name), name=cat_name)\n        for cat_name in kitti_categories\n    ]\n    coco_ann_id = 0\n    coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)\n    from os import path as osp\n    for info in mmcv.track_iter_progress(kitti_infos):\n        coco_infos = get_2d_boxes(info, occluded=[0, 1, 2, 3], mono3d=mono3d)\n        (height, width,\n         _) = mmcv.imread(osp.join(root_path,\n                                   info['image']['image_path'])).shape\n        coco_2d_dict['images'].append(\n            dict(\n                file_name=info['image']['image_path'],\n                id=info['image']['image_idx'],\n                Tri2v=info['calib']['Tr_imu_to_velo'],\n                Trv2c=info['calib']['Tr_velo_to_cam'],\n                rect=info['calib']['R0_rect'],\n                cam_intrinsic=info['calib']['P2'],\n                width=width,\n                height=height))\n        for coco_info in coco_infos:\n            if coco_info is None:\n                continue\n            # add an empty key for coco format\n            coco_info['segmentation'] = []\n            coco_info['id'] = coco_ann_id\n            coco_2d_dict['annotations'].append(coco_info)\n            coco_ann_id += 1\n    if mono3d:\n        json_prefix = f'{info_path[:-4]}_mono3d'\n    else:\n        json_prefix = f'{info_path[:-4]}'\n    mmcv.dump(coco_2d_dict, f'{json_prefix}.coco.json')\n\n\ndef get_2d_boxes(info, occluded, mono3d=True):\n    \"\"\"Get the 2D annotation records for a given info.\n\n    Args:\n        info: Information of the given sample data.\n        occluded: Integer (0, 1, 2, 3) indicating occlusion state:\n            0 = fully visible, 1 = partly occluded, 2 = largely occluded,\n            3 = unknown, -1 = DontCare\n        mono3d (bool): Whether to get boxes with mono3d annotation.\n\n    Return:\n        list[dict]: List of 2D annotation record that belongs to the input\n            `sample_data_token`.\n    \"\"\"\n    # Get calibration information\n    P2 = info['calib']['P2']\n\n    repro_recs = []\n    # if no annotations in info (test dataset), then return\n    if 'annos' not in info:\n        return repro_recs\n\n    # Get all the annotation with the specified visibilties.\n    ann_dicts = info['annos']\n    mask = [(ocld in occluded) for ocld in ann_dicts['occluded']]\n    for k in ann_dicts.keys():\n        ann_dicts[k] = ann_dicts[k][mask]\n\n    # convert dict of list to list of dict\n    ann_recs = []\n    for i in range(len(ann_dicts['occluded'])):\n        ann_rec = {}\n        for k in ann_dicts.keys():\n            ann_rec[k] = ann_dicts[k][i]\n        ann_recs.append(ann_rec)\n\n    for ann_idx, ann_rec in enumerate(ann_recs):\n        # Augment sample_annotation with token information.\n        ann_rec['sample_annotation_token'] = \\\n            f\"{info['image']['image_idx']}.{ann_idx}\"\n        ann_rec['sample_data_token'] = info['image']['image_idx']\n        sample_data_token = info['image']['image_idx']\n\n        loc = ann_rec['location'][np.newaxis, :]\n        dim = ann_rec['dimensions'][np.newaxis, :]\n        rot = ann_rec['rotation_y'][np.newaxis, np.newaxis]\n        # transform the center from [0.5, 1.0, 0.5] to [0.5, 0.5, 0.5]\n        dst = np.array([0.5, 0.5, 0.5])\n        src = np.array([0.5, 1.0, 0.5])\n        loc = loc + dim * (dst - src)\n        offset = (info['calib']['P2'][0, 3] - info['calib']['P0'][0, 3]) \\\n            / info['calib']['P2'][0, 0]\n        loc_3d = np.copy(loc)\n        loc_3d[0, 0] += offset\n        gt_bbox_3d = np.concatenate([loc, dim, rot], axis=1).astype(np.float32)\n\n        # Filter out the corners that are not in front of the calibrated\n        # sensor.\n        corners_3d = box_np_ops.center_to_corner_box3d(\n            gt_bbox_3d[:, :3],\n            gt_bbox_3d[:, 3:6],\n            gt_bbox_3d[:, 6], [0.5, 0.5, 0.5],\n            axis=1)\n        corners_3d = corners_3d[0].T  # (1, 8, 3) -> (3, 8)\n        in_front = np.argwhere(corners_3d[2, :] > 0).flatten()\n        corners_3d = corners_3d[:, in_front]\n\n        # Project 3d box to 2d.\n        camera_intrinsic = P2\n        corner_coords = view_points(corners_3d, camera_intrinsic,\n                                    True).T[:, :2].tolist()\n\n        # Keep only corners that fall within the image.\n        final_coords = post_process_coords(corner_coords)\n\n        # Skip if the convex hull of the re-projected corners\n        # does not intersect the image canvas.\n        if final_coords is None:\n            continue\n        else:\n            min_x, min_y, max_x, max_y = final_coords\n\n        # Generate dictionary record to be included in the .json file.\n        repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y,\n                                    sample_data_token,\n                                    info['image']['image_path'])\n\n        # If mono3d=True, add 3D annotations in camera coordinates\n        if mono3d and (repro_rec is not None):\n            repro_rec['bbox_cam3d'] = np.concatenate(\n                [loc_3d, dim, rot],\n                axis=1).astype(np.float32).squeeze().tolist()\n            repro_rec['velo_cam3d'] = -1  # no velocity in KITTI\n\n            center3d = np.array(loc).reshape([1, 3])\n            center2d = points_cam2img(\n                center3d, camera_intrinsic, with_depth=True)\n            repro_rec['center2d'] = center2d.squeeze().tolist()\n            # normalized center2D + depth\n            # samples with depth < 0 will be removed\n            if repro_rec['center2d'][2] <= 0:\n                continue\n\n            repro_rec['attribute_name'] = -1  # no attribute in KITTI\n            repro_rec['attribute_id'] = -1\n\n        repro_recs.append(repro_rec)\n\n    return repro_recs\n\n\ndef generate_record(ann_rec, x1, y1, x2, y2, sample_data_token, filename):\n    \"\"\"Generate one 2D annotation record given various information on top of\n    the 2D bounding box coordinates.\n\n    Args:\n        ann_rec (dict): Original 3d annotation record.\n        x1 (float): Minimum value of the x coordinate.\n        y1 (float): Minimum value of the y coordinate.\n        x2 (float): Maximum value of the x coordinate.\n        y2 (float): Maximum value of the y coordinate.\n        sample_data_token (str): Sample data token.\n        filename (str):The corresponding image file where the annotation\n            is present.\n\n    Returns:\n        dict: A sample 2D annotation record.\n            - file_name (str): file name\n            - image_id (str): sample data token\n            - area (float): 2d box area\n            - category_name (str): category name\n            - category_id (int): category id\n            - bbox (list[float]): left x, top y, x_size, y_size of 2d box\n            - iscrowd (int): whether the area is crowd\n    \"\"\"\n    repro_rec = OrderedDict()\n    repro_rec['sample_data_token'] = sample_data_token\n    coco_rec = dict()\n\n    key_mapping = {\n        'name': 'category_name',\n        'num_points_in_gt': 'num_lidar_pts',\n        'sample_annotation_token': 'sample_annotation_token',\n        'sample_data_token': 'sample_data_token',\n    }\n\n    for key, value in ann_rec.items():\n        if key in key_mapping.keys():\n            repro_rec[key_mapping[key]] = value\n\n    repro_rec['bbox_corners'] = [x1, y1, x2, y2]\n    repro_rec['filename'] = filename\n\n    coco_rec['file_name'] = filename\n    coco_rec['image_id'] = sample_data_token\n    coco_rec['area'] = (y2 - y1) * (x2 - x1)\n\n    if repro_rec['category_name'] not in kitti_categories:\n        return None\n    cat_name = repro_rec['category_name']\n    coco_rec['category_name'] = cat_name\n    coco_rec['category_id'] = kitti_categories.index(cat_name)\n    coco_rec['bbox'] = [x1, y1, x2 - x1, y2 - y1]\n    coco_rec['iscrowd'] = 0\n\n    return coco_rec\n"
  },
  {
    "path": "tools/data_converter/kitti_data_utils.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom collections import OrderedDict\nfrom concurrent import futures as futures\nfrom os import path as osp\nfrom pathlib import Path\n\nimport mmcv\nimport numpy as np\nfrom PIL import Image\nfrom skimage import io\n\n\ndef get_image_index_str(img_idx, use_prefix_id=False):\n    if use_prefix_id:\n        return '{:07d}'.format(img_idx)\n    else:\n        return '{:06d}'.format(img_idx)\n\n\ndef get_kitti_info_path(idx,\n                        prefix,\n                        info_type='image_2',\n                        file_tail='.png',\n                        training=True,\n                        relative_path=True,\n                        exist_check=True,\n                        use_prefix_id=False):\n    img_idx_str = get_image_index_str(idx, use_prefix_id)\n    img_idx_str += file_tail\n    prefix = Path(prefix)\n    if training:\n        file_path = Path('training') / info_type / img_idx_str\n    else:\n        file_path = Path('testing') / info_type / img_idx_str\n    if exist_check and not (prefix / file_path).exists():\n        raise ValueError('file not exist: {}'.format(file_path))\n    if relative_path:\n        return str(file_path)\n    else:\n        return str(prefix / file_path)\n\n\ndef get_image_path(idx,\n                   prefix,\n                   training=True,\n                   relative_path=True,\n                   exist_check=True,\n                   info_type='image_2',\n                   use_prefix_id=False):\n    return get_kitti_info_path(idx, prefix, info_type, '.png', training,\n                               relative_path, exist_check, use_prefix_id)\n\n\ndef get_label_path(idx,\n                   prefix,\n                   training=True,\n                   relative_path=True,\n                   exist_check=True,\n                   info_type='label_2',\n                   use_prefix_id=False):\n    return get_kitti_info_path(idx, prefix, info_type, '.txt', training,\n                               relative_path, exist_check, use_prefix_id)\n\n\ndef get_plane_path(idx,\n                   prefix,\n                   training=True,\n                   relative_path=True,\n                   exist_check=True,\n                   info_type='planes',\n                   use_prefix_id=False):\n    return get_kitti_info_path(idx, prefix, info_type, '.txt', training,\n                               relative_path, exist_check, use_prefix_id)\n\n\ndef get_velodyne_path(idx,\n                      prefix,\n                      training=True,\n                      relative_path=True,\n                      exist_check=True,\n                      use_prefix_id=False):\n    return get_kitti_info_path(idx, prefix, 'velodyne', '.bin', training,\n                               relative_path, exist_check, use_prefix_id)\n\n\ndef get_calib_path(idx,\n                   prefix,\n                   training=True,\n                   relative_path=True,\n                   exist_check=True,\n                   use_prefix_id=False):\n    return get_kitti_info_path(idx, prefix, 'calib', '.txt', training,\n                               relative_path, exist_check, use_prefix_id)\n\n\ndef get_pose_path(idx,\n                  prefix,\n                  training=True,\n                  relative_path=True,\n                  exist_check=True,\n                  use_prefix_id=False):\n    return get_kitti_info_path(idx, prefix, 'pose', '.txt', training,\n                               relative_path, exist_check, use_prefix_id)\n\n\ndef get_timestamp_path(idx,\n                       prefix,\n                       training=True,\n                       relative_path=True,\n                       exist_check=True,\n                       use_prefix_id=False):\n    return get_kitti_info_path(idx, prefix, 'timestamp', '.txt', training,\n                               relative_path, exist_check, use_prefix_id)\n\n\ndef get_label_anno(label_path):\n    annotations = {}\n    annotations.update({\n        'name': [],\n        'truncated': [],\n        'occluded': [],\n        'alpha': [],\n        'bbox': [],\n        'dimensions': [],\n        'location': [],\n        'rotation_y': []\n    })\n    with open(label_path, 'r') as f:\n        lines = f.readlines()\n    # if len(lines) == 0 or len(lines[0]) < 15:\n    #     content = []\n    # else:\n    content = [line.strip().split(' ') for line in lines]\n    num_objects = len([x[0] for x in content if x[0] != 'DontCare'])\n    annotations['name'] = np.array([x[0] for x in content])\n    num_gt = len(annotations['name'])\n    annotations['truncated'] = np.array([float(x[1]) for x in content])\n    annotations['occluded'] = np.array([int(x[2]) for x in content])\n    annotations['alpha'] = np.array([float(x[3]) for x in content])\n    annotations['bbox'] = np.array([[float(info) for info in x[4:8]]\n                                    for x in content]).reshape(-1, 4)\n    # dimensions will convert hwl format to standard lhw(camera) format.\n    annotations['dimensions'] = np.array([[float(info) for info in x[8:11]]\n                                          for x in content\n                                          ]).reshape(-1, 3)[:, [2, 0, 1]]\n    annotations['location'] = np.array([[float(info) for info in x[11:14]]\n                                        for x in content]).reshape(-1, 3)\n    annotations['rotation_y'] = np.array([float(x[14])\n                                          for x in content]).reshape(-1)\n    if len(content) != 0 and len(content[0]) == 16:  # have score\n        annotations['score'] = np.array([float(x[15]) for x in content])\n    else:\n        annotations['score'] = np.zeros((annotations['bbox'].shape[0], ))\n    index = list(range(num_objects)) + [-1] * (num_gt - num_objects)\n    annotations['index'] = np.array(index, dtype=np.int32)\n    annotations['group_ids'] = np.arange(num_gt, dtype=np.int32)\n    return annotations\n\n\ndef _extend_matrix(mat):\n    mat = np.concatenate([mat, np.array([[0., 0., 0., 1.]])], axis=0)\n    return mat\n\n\ndef get_kitti_image_info(path,\n                         training=True,\n                         label_info=True,\n                         velodyne=False,\n                         calib=False,\n                         with_plane=False,\n                         image_ids=7481,\n                         extend_matrix=True,\n                         num_worker=8,\n                         relative_path=True,\n                         with_imageshape=True):\n    \"\"\"\n    KITTI annotation format version 2:\n    {\n        [optional]points: [N, 3+] point cloud\n        [optional, for kitti]image: {\n            image_idx: ...\n            image_path: ...\n            image_shape: ...\n        }\n        point_cloud: {\n            num_features: 4\n            velodyne_path: ...\n        }\n        [optional, for kitti]calib: {\n            R0_rect: ...\n            Tr_velo_to_cam: ...\n            P2: ...\n        }\n        annos: {\n            location: [num_gt, 3] array\n            dimensions: [num_gt, 3] array\n            rotation_y: [num_gt] angle array\n            name: [num_gt] ground truth name array\n            [optional]difficulty: kitti difficulty\n            [optional]group_ids: used for multi-part object\n        }\n    }\n    \"\"\"\n    root_path = Path(path)\n    if not isinstance(image_ids, list):\n        image_ids = list(range(image_ids))\n\n    def map_func(idx):\n        info = {}\n        pc_info = {'num_features': 4}\n        calib_info = {}\n\n        image_info = {'image_idx': idx}\n        annotations = None\n        if velodyne:\n            pc_info['velodyne_path'] = get_velodyne_path(\n                idx, path, training, relative_path)\n        image_info['image_path'] = get_image_path(idx, path, training,\n                                                  relative_path)\n        if with_imageshape:\n            img_path = image_info['image_path']\n            if relative_path:\n                img_path = str(root_path / img_path)\n            image_info['image_shape'] = np.array(\n                io.imread(img_path).shape[:2], dtype=np.int32)\n        if label_info:\n            label_path = get_label_path(idx, path, training, relative_path)\n            if relative_path:\n                label_path = str(root_path / label_path)\n            annotations = get_label_anno(label_path)\n        info['image'] = image_info\n        info['point_cloud'] = pc_info\n        if calib:\n            calib_path = get_calib_path(\n                idx, path, training, relative_path=False)\n            with open(calib_path, 'r') as f:\n                lines = f.readlines()\n            P0 = np.array([float(info) for info in lines[0].split(' ')[1:13]\n                           ]).reshape([3, 4])\n            P1 = np.array([float(info) for info in lines[1].split(' ')[1:13]\n                           ]).reshape([3, 4])\n            P2 = np.array([float(info) for info in lines[2].split(' ')[1:13]\n                           ]).reshape([3, 4])\n            P3 = np.array([float(info) for info in lines[3].split(' ')[1:13]\n                           ]).reshape([3, 4])\n            if extend_matrix:\n                P0 = _extend_matrix(P0)\n                P1 = _extend_matrix(P1)\n                P2 = _extend_matrix(P2)\n                P3 = _extend_matrix(P3)\n            R0_rect = np.array([\n                float(info) for info in lines[4].split(' ')[1:10]\n            ]).reshape([3, 3])\n            if extend_matrix:\n                rect_4x4 = np.zeros([4, 4], dtype=R0_rect.dtype)\n                rect_4x4[3, 3] = 1.\n                rect_4x4[:3, :3] = R0_rect\n            else:\n                rect_4x4 = R0_rect\n\n            Tr_velo_to_cam = np.array([\n                float(info) for info in lines[5].split(' ')[1:13]\n            ]).reshape([3, 4])\n            Tr_imu_to_velo = np.array([\n                float(info) for info in lines[6].split(' ')[1:13]\n            ]).reshape([3, 4])\n            if extend_matrix:\n                Tr_velo_to_cam = _extend_matrix(Tr_velo_to_cam)\n                Tr_imu_to_velo = _extend_matrix(Tr_imu_to_velo)\n            calib_info['P0'] = P0\n            calib_info['P1'] = P1\n            calib_info['P2'] = P2\n            calib_info['P3'] = P3\n            calib_info['R0_rect'] = rect_4x4\n            calib_info['Tr_velo_to_cam'] = Tr_velo_to_cam\n            calib_info['Tr_imu_to_velo'] = Tr_imu_to_velo\n            info['calib'] = calib_info\n\n        if with_plane:\n            plane_path = get_plane_path(idx, path, training, relative_path)\n            if relative_path:\n                plane_path = str(root_path / plane_path)\n            lines = mmcv.list_from_file(plane_path)\n            info['plane'] = np.array([float(i) for i in lines[3].split()])\n\n        if annotations is not None:\n            info['annos'] = annotations\n            add_difficulty_to_annos(info)\n        return info\n\n    with futures.ThreadPoolExecutor(num_worker) as executor:\n        image_infos = executor.map(map_func, image_ids)\n\n    return list(image_infos)\n\n\nclass WaymoInfoGatherer:\n    \"\"\"\n    Parallel version of waymo dataset information gathering.\n    Waymo annotation format version like KITTI:\n    {\n        [optional]points: [N, 3+] point cloud\n        [optional, for kitti]image: {\n            image_idx: ...\n            image_path: ...\n            image_shape: ...\n        }\n        point_cloud: {\n            num_features: 6\n            velodyne_path: ...\n        }\n        [optional, for kitti]calib: {\n            R0_rect: ...\n            Tr_velo_to_cam0: ...\n            P0: ...\n        }\n        annos: {\n            location: [num_gt, 3] array\n            dimensions: [num_gt, 3] array\n            rotation_y: [num_gt] angle array\n            name: [num_gt] ground truth name array\n            [optional]difficulty: kitti difficulty\n            [optional]group_ids: used for multi-part object\n        }\n    }\n    \"\"\"\n\n    def __init__(self,\n                 path,\n                 training=True,\n                 label_info=True,\n                 velodyne=False,\n                 calib=False,\n                 pose=False,\n                 extend_matrix=True,\n                 num_worker=8,\n                 relative_path=True,\n                 with_imageshape=True,\n                 max_sweeps=5) -> None:\n        self.path = path\n        self.training = training\n        self.label_info = label_info\n        self.velodyne = velodyne\n        self.calib = calib\n        self.pose = pose\n        self.extend_matrix = extend_matrix\n        self.num_worker = num_worker\n        self.relative_path = relative_path\n        self.with_imageshape = with_imageshape\n        self.max_sweeps = max_sweeps\n\n    def gather_single(self, idx):\n        root_path = Path(self.path)\n        info = {}\n        pc_info = {'num_features': 6}\n        calib_info = {}\n\n        image_info = {'image_idx': idx}\n        annotations = None\n        if self.velodyne:\n            pc_info['velodyne_path'] = get_velodyne_path(\n                idx,\n                self.path,\n                self.training,\n                self.relative_path,\n                use_prefix_id=True)\n            with open(\n                    get_timestamp_path(\n                        idx,\n                        self.path,\n                        self.training,\n                        relative_path=False,\n                        use_prefix_id=True)) as f:\n                info['timestamp'] = np.int64(f.read())\n        image_info['image_path'] = get_image_path(\n            idx,\n            self.path,\n            self.training,\n            self.relative_path,\n            info_type='image_0',\n            use_prefix_id=True)\n        if self.with_imageshape:\n            img_path = image_info['image_path']\n            if self.relative_path:\n                img_path = str(root_path / img_path)\n            # io using PIL is significantly faster than skimage\n            w, h = Image.open(img_path).size\n            image_info['image_shape'] = np.array((h, w), dtype=np.int32)\n        if self.label_info:\n            label_path = get_label_path(\n                idx,\n                self.path,\n                self.training,\n                self.relative_path,\n                info_type='label_all',\n                use_prefix_id=True)\n            if self.relative_path:\n                label_path = str(root_path / label_path)\n            annotations = get_label_anno(label_path)\n        info['image'] = image_info\n        info['point_cloud'] = pc_info\n        if self.calib:\n            calib_path = get_calib_path(\n                idx,\n                self.path,\n                self.training,\n                relative_path=False,\n                use_prefix_id=True)\n            with open(calib_path, 'r') as f:\n                lines = f.readlines()\n            P0 = np.array([float(info) for info in lines[0].split(' ')[1:13]\n                           ]).reshape([3, 4])\n            P1 = np.array([float(info) for info in lines[1].split(' ')[1:13]\n                           ]).reshape([3, 4])\n            P2 = np.array([float(info) for info in lines[2].split(' ')[1:13]\n                           ]).reshape([3, 4])\n            P3 = np.array([float(info) for info in lines[3].split(' ')[1:13]\n                           ]).reshape([3, 4])\n            P4 = np.array([float(info) for info in lines[4].split(' ')[1:13]\n                           ]).reshape([3, 4])\n            if self.extend_matrix:\n                P0 = _extend_matrix(P0)\n                P1 = _extend_matrix(P1)\n                P2 = _extend_matrix(P2)\n                P3 = _extend_matrix(P3)\n                P4 = _extend_matrix(P4)\n            R0_rect = np.array([\n                float(info) for info in lines[5].split(' ')[1:10]\n            ]).reshape([3, 3])\n            if self.extend_matrix:\n                rect_4x4 = np.zeros([4, 4], dtype=R0_rect.dtype)\n                rect_4x4[3, 3] = 1.\n                rect_4x4[:3, :3] = R0_rect\n            else:\n                rect_4x4 = R0_rect\n\n            Tr_velo_to_cam = np.array([\n                float(info) for info in lines[6].split(' ')[1:13]\n            ]).reshape([3, 4])\n            if self.extend_matrix:\n                Tr_velo_to_cam = _extend_matrix(Tr_velo_to_cam)\n            calib_info['P0'] = P0\n            calib_info['P1'] = P1\n            calib_info['P2'] = P2\n            calib_info['P3'] = P3\n            calib_info['P4'] = P4\n            calib_info['R0_rect'] = rect_4x4\n            calib_info['Tr_velo_to_cam'] = Tr_velo_to_cam\n            info['calib'] = calib_info\n        if self.pose:\n            pose_path = get_pose_path(\n                idx,\n                self.path,\n                self.training,\n                relative_path=False,\n                use_prefix_id=True)\n            info['pose'] = np.loadtxt(pose_path)\n\n        if annotations is not None:\n            info['annos'] = annotations\n            info['annos']['camera_id'] = info['annos'].pop('score')\n            add_difficulty_to_annos(info)\n\n        sweeps = []\n        prev_idx = idx\n        while len(sweeps) < self.max_sweeps:\n            prev_info = {}\n            prev_idx -= 1\n            prev_info['velodyne_path'] = get_velodyne_path(\n                prev_idx,\n                self.path,\n                self.training,\n                self.relative_path,\n                exist_check=False,\n                use_prefix_id=True)\n            if_prev_exists = osp.exists(\n                Path(self.path) / prev_info['velodyne_path'])\n            if if_prev_exists:\n                with open(\n                        get_timestamp_path(\n                            prev_idx,\n                            self.path,\n                            self.training,\n                            relative_path=False,\n                            use_prefix_id=True)) as f:\n                    prev_info['timestamp'] = np.int64(f.read())\n                prev_pose_path = get_pose_path(\n                    prev_idx,\n                    self.path,\n                    self.training,\n                    relative_path=False,\n                    use_prefix_id=True)\n                prev_info['pose'] = np.loadtxt(prev_pose_path)\n                sweeps.append(prev_info)\n            else:\n                break\n        info['sweeps'] = sweeps\n\n        return info\n\n    def gather(self, image_ids):\n        if not isinstance(image_ids, list):\n            image_ids = list(range(image_ids))\n        image_infos = mmcv.track_parallel_progress(self.gather_single,\n                                                   image_ids, self.num_worker)\n        return list(image_infos)\n\n\ndef kitti_anno_to_label_file(annos, folder):\n    folder = Path(folder)\n    for anno in annos:\n        image_idx = anno['metadata']['image_idx']\n        label_lines = []\n        for j in range(anno['bbox'].shape[0]):\n            label_dict = {\n                'name': anno['name'][j],\n                'alpha': anno['alpha'][j],\n                'bbox': anno['bbox'][j],\n                'location': anno['location'][j],\n                'dimensions': anno['dimensions'][j],\n                'rotation_y': anno['rotation_y'][j],\n                'score': anno['score'][j],\n            }\n            label_line = kitti_result_line(label_dict)\n            label_lines.append(label_line)\n        label_file = folder / f'{get_image_index_str(image_idx)}.txt'\n        label_str = '\\n'.join(label_lines)\n        with open(label_file, 'w') as f:\n            f.write(label_str)\n\n\ndef add_difficulty_to_annos(info):\n    min_height = [40, 25,\n                  25]  # minimum height for evaluated groundtruth/detections\n    max_occlusion = [\n        0, 1, 2\n    ]  # maximum occlusion level of the groundtruth used for evaluation\n    max_trunc = [\n        0.15, 0.3, 0.5\n    ]  # maximum truncation level of the groundtruth used for evaluation\n    annos = info['annos']\n    dims = annos['dimensions']  # lhw format\n    bbox = annos['bbox']\n    height = bbox[:, 3] - bbox[:, 1]\n    occlusion = annos['occluded']\n    truncation = annos['truncated']\n    diff = []\n    easy_mask = np.ones((len(dims), ), dtype=np.bool)\n    moderate_mask = np.ones((len(dims), ), dtype=np.bool)\n    hard_mask = np.ones((len(dims), ), dtype=np.bool)\n    i = 0\n    for h, o, t in zip(height, occlusion, truncation):\n        if o > max_occlusion[0] or h <= min_height[0] or t > max_trunc[0]:\n            easy_mask[i] = False\n        if o > max_occlusion[1] or h <= min_height[1] or t > max_trunc[1]:\n            moderate_mask[i] = False\n        if o > max_occlusion[2] or h <= min_height[2] or t > max_trunc[2]:\n            hard_mask[i] = False\n        i += 1\n    is_easy = easy_mask\n    is_moderate = np.logical_xor(easy_mask, moderate_mask)\n    is_hard = np.logical_xor(hard_mask, moderate_mask)\n\n    for i in range(len(dims)):\n        if is_easy[i]:\n            diff.append(0)\n        elif is_moderate[i]:\n            diff.append(1)\n        elif is_hard[i]:\n            diff.append(2)\n        else:\n            diff.append(-1)\n    annos['difficulty'] = np.array(diff, np.int32)\n    return diff\n\n\ndef kitti_result_line(result_dict, precision=4):\n    prec_float = '{' + ':.{}f'.format(precision) + '}'\n    res_line = []\n    all_field_default = OrderedDict([\n        ('name', None),\n        ('truncated', -1),\n        ('occluded', -1),\n        ('alpha', -10),\n        ('bbox', None),\n        ('dimensions', [-1, -1, -1]),\n        ('location', [-1000, -1000, -1000]),\n        ('rotation_y', -10),\n        ('score', 0.0),\n    ])\n    res_dict = [(key, None) for key, val in all_field_default.items()]\n    res_dict = OrderedDict(res_dict)\n    for key, val in result_dict.items():\n        if all_field_default[key] is None and val is None:\n            raise ValueError('you must specify a value for {}'.format(key))\n        res_dict[key] = val\n\n    for key, val in res_dict.items():\n        if key == 'name':\n            res_line.append(val)\n        elif key in ['truncated', 'alpha', 'rotation_y', 'score']:\n            if val is None:\n                res_line.append(str(all_field_default[key]))\n            else:\n                res_line.append(prec_float.format(val))\n        elif key == 'occluded':\n            if val is None:\n                res_line.append(str(all_field_default[key]))\n            else:\n                res_line.append('{}'.format(val))\n        elif key in ['bbox', 'dimensions', 'location']:\n            if val is None:\n                res_line += [str(v) for v in all_field_default[key]]\n            else:\n                res_line += [prec_float.format(v) for v in val]\n        else:\n            raise ValueError('unknown key. supported key:{}'.format(\n                res_dict.keys()))\n    return ' '.join(res_line)\n"
  },
  {
    "path": "tools/data_converter/lyft_converter.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os\nfrom logging import warning\nfrom os import path as osp\n\nimport mmcv\nimport numpy as np\nfrom lyft_dataset_sdk.lyftdataset import LyftDataset as Lyft\nfrom pyquaternion import Quaternion\n\nfrom mmdet3d.datasets import LyftDataset\nfrom .nuscenes_converter import (get_2d_boxes, get_available_scenes,\n                                 obtain_sensor2top)\n\nlyft_categories = ('car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle',\n                   'motorcycle', 'bicycle', 'pedestrian', 'animal')\n\n\ndef create_lyft_infos(root_path,\n                      info_prefix,\n                      version='v1.01-train',\n                      max_sweeps=10):\n    \"\"\"Create info file of lyft dataset.\n\n    Given the raw data, generate its related info file in pkl format.\n\n    Args:\n        root_path (str): Path of the data root.\n        info_prefix (str): Prefix of the info file to be generated.\n        version (str, optional): Version of the data.\n            Default: 'v1.01-train'.\n        max_sweeps (int, optional): Max number of sweeps.\n            Default: 10.\n    \"\"\"\n    lyft = Lyft(\n        data_path=osp.join(root_path, version),\n        json_path=osp.join(root_path, version, version),\n        verbose=True)\n    available_vers = ['v1.01-train', 'v1.01-test']\n    assert version in available_vers\n    if version == 'v1.01-train':\n        train_scenes = mmcv.list_from_file('data/lyft/train.txt')\n        val_scenes = mmcv.list_from_file('data/lyft/val.txt')\n    elif version == 'v1.01-test':\n        train_scenes = mmcv.list_from_file('data/lyft/test.txt')\n        val_scenes = []\n    else:\n        raise ValueError('unknown')\n\n    # filter existing scenes.\n    available_scenes = get_available_scenes(lyft)\n    available_scene_names = [s['name'] for s in available_scenes]\n    train_scenes = list(\n        filter(lambda x: x in available_scene_names, train_scenes))\n    val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes))\n    train_scenes = set([\n        available_scenes[available_scene_names.index(s)]['token']\n        for s in train_scenes\n    ])\n    val_scenes = set([\n        available_scenes[available_scene_names.index(s)]['token']\n        for s in val_scenes\n    ])\n\n    test = 'test' in version\n    if test:\n        print(f'test scene: {len(train_scenes)}')\n    else:\n        print(f'train scene: {len(train_scenes)}, \\\n                val scene: {len(val_scenes)}')\n    train_lyft_infos, val_lyft_infos = _fill_trainval_infos(\n        lyft, train_scenes, val_scenes, test, max_sweeps=max_sweeps)\n\n    metadata = dict(version=version)\n    if test:\n        print(f'test sample: {len(train_lyft_infos)}')\n        data = dict(infos=train_lyft_infos, metadata=metadata)\n        info_name = f'{info_prefix}_infos_test'\n        info_path = osp.join(root_path, f'{info_name}.pkl')\n        mmcv.dump(data, info_path)\n    else:\n        print(f'train sample: {len(train_lyft_infos)}, \\\n                val sample: {len(val_lyft_infos)}')\n        data = dict(infos=train_lyft_infos, metadata=metadata)\n        train_info_name = f'{info_prefix}_infos_train'\n        info_path = osp.join(root_path, f'{train_info_name}.pkl')\n        mmcv.dump(data, info_path)\n        data['infos'] = val_lyft_infos\n        val_info_name = f'{info_prefix}_infos_val'\n        info_val_path = osp.join(root_path, f'{val_info_name}.pkl')\n        mmcv.dump(data, info_val_path)\n\n\ndef _fill_trainval_infos(lyft,\n                         train_scenes,\n                         val_scenes,\n                         test=False,\n                         max_sweeps=10):\n    \"\"\"Generate the train/val infos from the raw data.\n\n    Args:\n        lyft (:obj:`LyftDataset`): Dataset class in the Lyft dataset.\n        train_scenes (list[str]): Basic information of training scenes.\n        val_scenes (list[str]): Basic information of validation scenes.\n        test (bool, optional): Whether use the test mode. In the test mode, no\n            annotations can be accessed. Default: False.\n        max_sweeps (int, optional): Max number of sweeps. Default: 10.\n\n    Returns:\n        tuple[list[dict]]: Information of training set and\n            validation set that will be saved to the info file.\n    \"\"\"\n    train_lyft_infos = []\n    val_lyft_infos = []\n\n    for sample in mmcv.track_iter_progress(lyft.sample):\n        lidar_token = sample['data']['LIDAR_TOP']\n        sd_rec = lyft.get('sample_data', sample['data']['LIDAR_TOP'])\n        cs_record = lyft.get('calibrated_sensor',\n                             sd_rec['calibrated_sensor_token'])\n        pose_record = lyft.get('ego_pose', sd_rec['ego_pose_token'])\n        abs_lidar_path, boxes, _ = lyft.get_sample_data(lidar_token)\n        # nuScenes devkit returns more convenient relative paths while\n        # lyft devkit returns absolute paths\n        abs_lidar_path = str(abs_lidar_path)  # absolute path\n        lidar_path = abs_lidar_path.split(f'{os.getcwd()}/')[-1]\n        # relative path\n\n        mmcv.check_file_exist(lidar_path)\n\n        info = {\n            'lidar_path': lidar_path,\n            'token': sample['token'],\n            'sweeps': [],\n            'cams': dict(),\n            'lidar2ego_translation': cs_record['translation'],\n            'lidar2ego_rotation': cs_record['rotation'],\n            'ego2global_translation': pose_record['translation'],\n            'ego2global_rotation': pose_record['rotation'],\n            'timestamp': sample['timestamp'],\n        }\n\n        l2e_r = info['lidar2ego_rotation']\n        l2e_t = info['lidar2ego_translation']\n        e2g_r = info['ego2global_rotation']\n        e2g_t = info['ego2global_translation']\n        l2e_r_mat = Quaternion(l2e_r).rotation_matrix\n        e2g_r_mat = Quaternion(e2g_r).rotation_matrix\n\n        # obtain 6 image's information per frame\n        camera_types = [\n            'CAM_FRONT',\n            'CAM_FRONT_RIGHT',\n            'CAM_FRONT_LEFT',\n            'CAM_BACK',\n            'CAM_BACK_LEFT',\n            'CAM_BACK_RIGHT',\n        ]\n        for cam in camera_types:\n            cam_token = sample['data'][cam]\n            cam_path, _, cam_intrinsic = lyft.get_sample_data(cam_token)\n            cam_info = obtain_sensor2top(lyft, cam_token, l2e_t, l2e_r_mat,\n                                         e2g_t, e2g_r_mat, cam)\n            cam_info.update(cam_intrinsic=cam_intrinsic)\n            info['cams'].update({cam: cam_info})\n\n        # obtain sweeps for a single key-frame\n        sd_rec = lyft.get('sample_data', sample['data']['LIDAR_TOP'])\n        sweeps = []\n        while len(sweeps) < max_sweeps:\n            if not sd_rec['prev'] == '':\n                sweep = obtain_sensor2top(lyft, sd_rec['prev'], l2e_t,\n                                          l2e_r_mat, e2g_t, e2g_r_mat, 'lidar')\n                sweeps.append(sweep)\n                sd_rec = lyft.get('sample_data', sd_rec['prev'])\n            else:\n                break\n        info['sweeps'] = sweeps\n        # obtain annotation\n        if not test:\n            annotations = [\n                lyft.get('sample_annotation', token)\n                for token in sample['anns']\n            ]\n            locs = np.array([b.center for b in boxes]).reshape(-1, 3)\n            dims = np.array([b.wlh for b in boxes]).reshape(-1, 3)\n            rots = np.array([b.orientation.yaw_pitch_roll[0]\n                             for b in boxes]).reshape(-1, 1)\n\n            names = [b.name for b in boxes]\n            for i in range(len(names)):\n                if names[i] in LyftDataset.NameMapping:\n                    names[i] = LyftDataset.NameMapping[names[i]]\n            names = np.array(names)\n\n            # we need to convert box size to\n            # the format of our lidar coordinate system\n            # which is x_size, y_size, z_size (corresponding to l, w, h)\n            gt_boxes = np.concatenate([locs, dims[:, [1, 0, 2]], rots], axis=1)\n            assert len(gt_boxes) == len(\n                annotations), f'{len(gt_boxes)}, {len(annotations)}'\n            info['gt_boxes'] = gt_boxes\n            info['gt_names'] = names\n            info['num_lidar_pts'] = np.array(\n                [a['num_lidar_pts'] for a in annotations])\n            info['num_radar_pts'] = np.array(\n                [a['num_radar_pts'] for a in annotations])\n\n        if sample['scene_token'] in train_scenes:\n            train_lyft_infos.append(info)\n        else:\n            val_lyft_infos.append(info)\n\n    return train_lyft_infos, val_lyft_infos\n\n\ndef export_2d_annotation(root_path, info_path, version):\n    \"\"\"Export 2d annotation from the info file and raw data.\n\n    Args:\n        root_path (str): Root path of the raw data.\n        info_path (str): Path of the info file.\n        version (str): Dataset version.\n    \"\"\"\n    warning.warn('DeprecationWarning: 2D annotations are not used on the '\n                 'Lyft dataset. The function export_2d_annotation will be '\n                 'deprecated.')\n    # get bbox annotations for camera\n    camera_types = [\n        'CAM_FRONT',\n        'CAM_FRONT_RIGHT',\n        'CAM_FRONT_LEFT',\n        'CAM_BACK',\n        'CAM_BACK_LEFT',\n        'CAM_BACK_RIGHT',\n    ]\n    lyft_infos = mmcv.load(info_path)['infos']\n    lyft = Lyft(\n        data_path=osp.join(root_path, version),\n        json_path=osp.join(root_path, version, version),\n        verbose=True)\n    # info_2d_list = []\n    cat2Ids = [\n        dict(id=lyft_categories.index(cat_name), name=cat_name)\n        for cat_name in lyft_categories\n    ]\n    coco_ann_id = 0\n    coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)\n    for info in mmcv.track_iter_progress(lyft_infos):\n        for cam in camera_types:\n            cam_info = info['cams'][cam]\n            coco_infos = get_2d_boxes(\n                lyft,\n                cam_info['sample_data_token'],\n                visibilities=['', '1', '2', '3', '4'])\n            (height, width, _) = mmcv.imread(cam_info['data_path']).shape\n            coco_2d_dict['images'].append(\n                dict(\n                    file_name=cam_info['data_path'],\n                    id=cam_info['sample_data_token'],\n                    width=width,\n                    height=height))\n            for coco_info in coco_infos:\n                if coco_info is None:\n                    continue\n                # add an empty key for coco format\n                coco_info['segmentation'] = []\n                coco_info['id'] = coco_ann_id\n                coco_2d_dict['annotations'].append(coco_info)\n                coco_ann_id += 1\n    mmcv.dump(coco_2d_dict, f'{info_path[:-4]}.coco.json')\n"
  },
  {
    "path": "tools/data_converter/lyft_data_fixer.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport os\n\nimport numpy as np\n\n\ndef fix_lyft(root_folder='./data/lyft', version='v1.01'):\n    # refer to https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/discussion/110000  # noqa\n    lidar_path = 'lidar/host-a011_lidar1_1233090652702363606.bin'\n    root_folder = os.path.join(root_folder, f'{version}-train')\n    lidar_path = os.path.join(root_folder, lidar_path)\n    assert os.path.isfile(lidar_path), f'Please download the complete Lyft ' \\\n        f'dataset and make sure {lidar_path} is present.'\n    points = np.fromfile(lidar_path, dtype=np.float32, count=-1)\n    try:\n        points.reshape([-1, 5])\n        print(f'This fix is not required for version {version}.')\n    except ValueError:\n        new_points = np.array(list(points) + [100.0, 1.0], dtype='float32')\n        new_points.tofile(lidar_path)\n        print(f'Appended 100.0 and 1.0 to the end of {lidar_path}.')\n\n\nparser = argparse.ArgumentParser(description='Lyft dataset fixer arg parser')\nparser.add_argument(\n    '--root-folder',\n    type=str,\n    default='./data/lyft',\n    help='specify the root path of Lyft dataset')\nparser.add_argument(\n    '--version',\n    type=str,\n    default='v1.01',\n    help='specify Lyft dataset version')\nargs = parser.parse_args()\n\nif __name__ == '__main__':\n    fix_lyft(root_folder=args.root_folder, version=args.version)\n"
  },
  {
    "path": "tools/data_converter/nuimage_converter.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport base64\nfrom os import path as osp\n\nimport mmcv\nimport numpy as np\nfrom nuimages import NuImages\nfrom nuimages.utils.utils import mask_decode, name_to_index_mapping\n\nnus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',\n                  'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',\n                  'barrier')\n\nNAME_MAPPING = {\n    'movable_object.barrier': 'barrier',\n    'vehicle.bicycle': 'bicycle',\n    'vehicle.bus.bendy': 'bus',\n    'vehicle.bus.rigid': 'bus',\n    'vehicle.car': 'car',\n    'vehicle.construction': 'construction_vehicle',\n    'vehicle.motorcycle': 'motorcycle',\n    'human.pedestrian.adult': 'pedestrian',\n    'human.pedestrian.child': 'pedestrian',\n    'human.pedestrian.construction_worker': 'pedestrian',\n    'human.pedestrian.police_officer': 'pedestrian',\n    'movable_object.trafficcone': 'traffic_cone',\n    'vehicle.trailer': 'trailer',\n    'vehicle.truck': 'truck',\n}\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Data converter arg parser')\n    parser.add_argument(\n        '--data-root',\n        type=str,\n        default='./data/nuimages',\n        help='specify the root path of dataset')\n    parser.add_argument(\n        '--version',\n        type=str,\n        nargs='+',\n        default=['v1.0-mini'],\n        required=False,\n        help='specify the dataset version')\n    parser.add_argument(\n        '--out-dir',\n        type=str,\n        default='./data/nuimages/annotations/',\n        required=False,\n        help='path to save the exported json')\n    parser.add_argument(\n        '--nproc',\n        type=int,\n        default=4,\n        required=False,\n        help='workers to process semantic masks')\n    parser.add_argument('--extra-tag', type=str, default='nuimages')\n    args = parser.parse_args()\n    return args\n\n\ndef get_img_annos(nuim, img_info, cat2id, out_dir, data_root, seg_root):\n    \"\"\"Get semantic segmentation map for an image.\n\n    Args:\n        nuim (obj:`NuImages`): NuImages dataset object\n        img_info (dict): Meta information of img\n\n    Returns:\n        np.ndarray: Semantic segmentation map of the image\n    \"\"\"\n    sd_token = img_info['token']\n    image_id = img_info['id']\n    name_to_index = name_to_index_mapping(nuim.category)\n\n    # Get image data.\n    width, height = img_info['width'], img_info['height']\n    semseg_mask = np.zeros((height, width)).astype('uint8')\n\n    # Load stuff / surface regions.\n    surface_anns = [\n        o for o in nuim.surface_ann if o['sample_data_token'] == sd_token\n    ]\n\n    # Draw stuff / surface regions.\n    for ann in surface_anns:\n        # Get color and mask.\n        category_token = ann['category_token']\n        category_name = nuim.get('category', category_token)['name']\n        if ann['mask'] is None:\n            continue\n        mask = mask_decode(ann['mask'])\n\n        # Draw mask for semantic segmentation.\n        semseg_mask[mask == 1] = name_to_index[category_name]\n\n    # Load object instances.\n    object_anns = [\n        o for o in nuim.object_ann if o['sample_data_token'] == sd_token\n    ]\n\n    # Sort by token to ensure that objects always appear in the\n    # instance mask in the same order.\n    object_anns = sorted(object_anns, key=lambda k: k['token'])\n\n    # Draw object instances.\n    # The 0 index is reserved for background; thus, the instances\n    # should start from index 1.\n    annotations = []\n    for i, ann in enumerate(object_anns, start=1):\n        # Get color, box, mask and name.\n        category_token = ann['category_token']\n        category_name = nuim.get('category', category_token)['name']\n        if ann['mask'] is None:\n            continue\n        mask = mask_decode(ann['mask'])\n\n        # Draw masks for semantic segmentation and instance segmentation.\n        semseg_mask[mask == 1] = name_to_index[category_name]\n\n        if category_name in NAME_MAPPING:\n            cat_name = NAME_MAPPING[category_name]\n            cat_id = cat2id[cat_name]\n\n            x_min, y_min, x_max, y_max = ann['bbox']\n            # encode calibrated instance mask\n            mask_anno = dict()\n            mask_anno['counts'] = base64.b64decode(\n                ann['mask']['counts']).decode()\n            mask_anno['size'] = ann['mask']['size']\n\n            data_anno = dict(\n                image_id=image_id,\n                category_id=cat_id,\n                bbox=[x_min, y_min, x_max - x_min, y_max - y_min],\n                area=(x_max - x_min) * (y_max - y_min),\n                segmentation=mask_anno,\n                iscrowd=0)\n            annotations.append(data_anno)\n\n    # after process, save semantic masks\n    img_filename = img_info['file_name']\n    seg_filename = img_filename.replace('jpg', 'png')\n    seg_filename = osp.join(seg_root, seg_filename)\n    mmcv.imwrite(semseg_mask, seg_filename)\n    return annotations, np.max(semseg_mask)\n\n\ndef export_nuim_to_coco(nuim, data_root, out_dir, extra_tag, version, nproc):\n    print('Process category information')\n    categories = []\n    categories = [\n        dict(id=nus_categories.index(cat_name), name=cat_name)\n        for cat_name in nus_categories\n    ]\n    cat2id = {k_v['name']: k_v['id'] for k_v in categories}\n\n    images = []\n    print('Process image meta information...')\n    for sample_info in mmcv.track_iter_progress(nuim.sample_data):\n        if sample_info['is_key_frame']:\n            img_idx = len(images)\n            images.append(\n                dict(\n                    id=img_idx,\n                    token=sample_info['token'],\n                    file_name=sample_info['filename'],\n                    width=sample_info['width'],\n                    height=sample_info['height']))\n\n    seg_root = f'{out_dir}semantic_masks'\n    mmcv.mkdir_or_exist(seg_root)\n    mmcv.mkdir_or_exist(osp.join(data_root, 'calibrated'))\n\n    global process_img_anno\n\n    def process_img_anno(img_info):\n        single_img_annos, max_cls_id = get_img_annos(nuim, img_info, cat2id,\n                                                     out_dir, data_root,\n                                                     seg_root)\n        return single_img_annos, max_cls_id\n\n    print('Process img annotations...')\n    if nproc > 1:\n        outputs = mmcv.track_parallel_progress(\n            process_img_anno, images, nproc=nproc)\n    else:\n        outputs = []\n        for img_info in mmcv.track_iter_progress(images):\n            outputs.append(process_img_anno(img_info))\n\n    # Determine the index of object annotation\n    print('Process annotation information...')\n    annotations = []\n    max_cls_ids = []\n    for single_img_annos, max_cls_id in outputs:\n        max_cls_ids.append(max_cls_id)\n        for img_anno in single_img_annos:\n            img_anno.update(id=len(annotations))\n            annotations.append(img_anno)\n\n    max_cls_id = max(max_cls_ids)\n    print(f'Max ID of class in the semantic map: {max_cls_id}')\n\n    coco_format_json = dict(\n        images=images, annotations=annotations, categories=categories)\n\n    mmcv.mkdir_or_exist(out_dir)\n    out_file = osp.join(out_dir, f'{extra_tag}_{version}.json')\n    print(f'Annotation dumped to {out_file}')\n    mmcv.dump(coco_format_json, out_file)\n\n\ndef main():\n    args = parse_args()\n    for version in args.version:\n        nuim = NuImages(\n            dataroot=args.data_root, version=version, verbose=True, lazy=True)\n        export_nuim_to_coco(nuim, args.data_root, args.out_dir, args.extra_tag,\n                            version, args.nproc)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/data_converter/nuscenes_converter.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os\nfrom collections import OrderedDict\nfrom os import path as osp\nfrom typing import List, Tuple, Union\n\nimport mmcv\nimport numpy as np\nfrom nuscenes.nuscenes import NuScenes\nfrom nuscenes.utils.geometry_utils import view_points\nfrom nuscenes.prediction import PredictHelper\nfrom pyquaternion import Quaternion\nfrom shapely.geometry import MultiPoint, box\nfrom nuscenes.utils.geometry_utils import transform_matrix\nimport math\nfrom mmdet3d.core.bbox import points_cam2img\nfrom mmdet3d.datasets import NuScenesDataset\nfrom tqdm import tqdm \nimport multiprocessing\nimport copy\nfrom multiprocessing import Manager\nfrom data_converter.nuscenes_prediction_tools import get_forecasting_annotations\nfrom nuscenes.utils.data_classes import Box\nnus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',\n                  'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',\n                  'barrier')\n\nnus_attributes = ('cycle.with_rider', 'cycle.without_rider',\n                  'pedestrian.moving', 'pedestrian.standing',\n                  'pedestrian.sitting_lying_down', 'vehicle.moving',\n                  'vehicle.parked', 'vehicle.stopped', 'None')\nego_width, ego_length = 1.85, 4.084\n\ndef quart_to_rpy(qua):\n    x, y, z, w = qua\n    roll = math.atan2(2 * (w * x + y * z), 1 - 2 * (x * x + y * y))\n    pitch = math.asin(2 * (w * y - x * z))\n    yaw = math.atan2(2 * (w * z + x * y), 1 - 2 * (z * z + y * y))\n    return roll, pitch, yaw\n\ndef locate_message(utimes, utime):\n    i = np.searchsorted(utimes, utime)\n    if i == len(utimes) or (i > 0 and utime - utimes[i-1] < utimes[i] - utime):\n        i -= 1\n    return i\n\ndef create_nuscenes_infos(root_path,\n                          info_prefix,\n                          version='v1.0-trainval',\n                          max_sweeps=10):\n    \"\"\"Create info file of nuscene dataset.\n\n    Given the raw data, generate its related info file in pkl format.\n\n    Args:\n        root_path (str): Path of the data root.\n        info_prefix (str): Prefix of the info file to be generated.\n        version (str, optional): Version of the data.\n            Default: 'v1.0-trainval'.\n        max_sweeps (int, optional): Max number of sweeps.\n            Default: 10.\n    \"\"\"\n    from nuscenes.nuscenes import NuScenes\n    from nuscenes.can_bus.can_bus_api import NuScenesCanBus\n    nusc = NuScenes(version=version, dataroot=root_path, verbose=True)\n    nusc_can_bus = NuScenesCanBus(dataroot=root_path)\n    from nuscenes.utils import splits\n    available_vers = ['v1.0-trainval', 'v1.0-test', 'v1.0-mini']\n    assert version in available_vers\n    if version == 'v1.0-trainval':\n        train_scenes = splits.train\n        val_scenes = splits.val\n    elif version == 'v1.0-test':\n        train_scenes = splits.test\n        val_scenes = []\n    elif version == 'v1.0-mini':\n        train_scenes = splits.mini_train\n        val_scenes = splits.mini_val\n    else:\n        raise ValueError('unknown')\n\n    # filter existing scenes.\n    available_scenes = get_available_scenes(nusc)\n    available_scene_names = [s['name'] for s in available_scenes]\n    train_scenes = list(\n        filter(lambda x: x in available_scene_names, train_scenes))\n    val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes))\n    train_scenes = set([\n        available_scenes[available_scene_names.index(s)]['token']\n        for s in train_scenes\n    ])\n    val_scenes = set([\n        available_scenes[available_scene_names.index(s)]['token']\n        for s in val_scenes\n    ])\n\n    test = 'test' in version\n    if test:\n        print('test scene: {}'.format(len(train_scenes)))\n    else:\n        print('train scene: {}, val scene: {}'.format(\n            len(train_scenes), len(val_scenes)))\n    train_nusc_infos, val_nusc_infos = _fill_trainval_infos(\n        nusc, nusc_can_bus, train_scenes, val_scenes, test, max_sweeps=max_sweeps)\n\n    metadata = dict(version=version)\n    if test:\n        print('test sample: {}'.format(len(train_nusc_infos)))\n        data = dict(infos=train_nusc_infos, metadata=metadata)\n        info_path = osp.join(root_path,\n                             '{}_infos_test.pkl'.format(info_prefix))\n        mmcv.dump(data, info_path)\n    else:\n        print('train sample: {}, val sample: {}'.format(\n            len(train_nusc_infos), len(val_nusc_infos)))\n        data = dict(infos=train_nusc_infos, metadata=metadata)\n        info_path = osp.join(root_path,\n                             '{}_infos_train.pkl'.format(info_prefix))\n        mmcv.dump(data, info_path)\n        data['infos'] = val_nusc_infos\n        info_val_path = osp.join(root_path,\n                                 '{}_infos_val.pkl'.format(info_prefix))\n        mmcv.dump(data, info_val_path)\n\n\ndef get_available_scenes(nusc):\n    \"\"\"Get available scenes from the input nuscenes class.\n\n    Given the raw data, get the information of available scenes for\n    further info generation.\n\n    Args:\n        nusc (class): Dataset class in the nuScenes dataset.\n\n    Returns:\n        available_scenes (list[dict]): List of basic information for the\n            available scenes.\n    \"\"\"\n    available_scenes = []\n    print('total scene num: {}'.format(len(nusc.scene)))\n    for scene in nusc.scene:\n        scene_token = scene['token']\n        scene_rec = nusc.get('scene', scene_token)\n        sample_rec = nusc.get('sample', scene_rec['first_sample_token'])\n        sd_rec = nusc.get('sample_data', sample_rec['data']['LIDAR_TOP'])\n        has_more_frames = True\n        scene_not_exist = False\n        while has_more_frames:\n            lidar_path, boxes, _ = nusc.get_sample_data(sd_rec['token'])\n            lidar_path = str(lidar_path)\n            if os.getcwd() in lidar_path:\n                # path from lyftdataset is absolute path\n                lidar_path = lidar_path.split(f'{os.getcwd()}/')[-1]\n                # relative path\n            if not mmcv.is_filepath(lidar_path):\n                scene_not_exist = True\n                break\n            else:\n                break\n        if scene_not_exist:\n            continue\n        available_scenes.append(scene)\n    print('exist scene num: {}'.format(len(available_scenes)))\n    return available_scenes\n\ndef _get_future_traj_info(nusc, sample, predict_steps=8, in_agent_frame=False):\n    sample_token = sample['token']\n    ann_tokens = np.array(sample['anns'])\n    sd_rec = nusc.get('sample', sample_token)\n    fut_traj_all = []\n    fut_traj_valid_mask_all = []\n    _, boxes, _ = nusc.get_sample_data(sd_rec['data']['LIDAR_TOP'], selected_anntokens=ann_tokens)\n    predict_helper = PredictHelper(nusc)\n    for i, ann_token in enumerate(ann_tokens):\n        box = boxes[i]\n        instance_token = nusc.get('sample_annotation', ann_token)['instance_token']\n        fut_traj_local = predict_helper.get_future_for_agent(instance_token,\n                                                             sample_token,\n                                                             seconds=predict_steps//2,\n                                                             in_agent_frame=in_agent_frame)\n\n        fut_traj = np.zeros((predict_steps, 2))\n        fut_traj_valid_mask = np.zeros((predict_steps, 2))\n        if fut_traj_local.shape[0] > 0:\n            # trans = box.center\n            # trans = np.array([0, 0, 0])\n            # rot = Quaternion(matrix=box.rotation_matrix)\n            # fut_traj_scence_centric = convert_local_coords_to_global(fut_traj_local, trans, rot)  \n            fut_traj_scence_centric = fut_traj_local\n            fut_traj[:fut_traj_scence_centric.shape[0], :] = fut_traj_scence_centric\n            fut_traj_valid_mask[:fut_traj_scence_centric.shape[0], :] = 1\n        fut_traj_all.append(fut_traj)\n        fut_traj_valid_mask_all.append(fut_traj_valid_mask)\n    if len(ann_tokens) > 0:\n        fut_traj_all = np.stack(fut_traj_all, axis=0)\n        fut_traj_valid_mask_all = np.stack(fut_traj_valid_mask_all, axis=0)\n    else:\n        fut_traj_all = np.zeros((0, predict_steps, 2))\n        fut_traj_valid_mask_all = np.zeros((0, predict_steps, 2))\n    return fut_traj_all, fut_traj_valid_mask_all\n\ndef _get_can_bus_info(nusc, nusc_can_bus, sample):\n    scene_name = nusc.get('scene', sample['scene_token'])['name']\n    sample_timestamp = sample['timestamp']\n    try:\n        pose_list = nusc_can_bus.get_messages(scene_name, 'pose')\n    except:\n        return np.zeros(18)  # server scenes do not have can bus information.\n    can_bus = []\n    # during each scene, the first timestamp of can_bus may be large than the first sample's timestamp\n    last_pose = pose_list[0]\n    for i, pose in enumerate(pose_list):\n        if pose['utime'] > sample_timestamp:\n            break\n        last_pose = pose\n    _ = last_pose.pop('utime')  # useless\n    pos = last_pose.pop('pos')\n    rotation = last_pose.pop('orientation')\n    can_bus.extend(pos)\n    can_bus.extend(rotation)\n    for key in last_pose.keys():\n        can_bus.extend(pose[key])  # 16 elements\n    can_bus.extend([0., 0.])\n    return np.array(can_bus)\n\ndef _fill_trainval_infos(nusc,\n                         nusc_can_bus,\n                         train_scenes,\n                         val_scenes,\n                         test=False,\n                         max_sweeps=10,\n                         forecasting=False,\n                         forecasting_length=13,\n                         his_ts=2,\n                         fut_ts=6,\n                         ):\n    \"\"\"Generate the train/val infos from the raw data.\n\n    Args:\n        nusc (:obj:`NuScenes`): Dataset class in the nuScenes dataset.\n        train_scenes (list[str]): Basic information of training scenes.\n        val_scenes (list[str]): Basic information of validation scenes.\n        test (bool, optional): Whether use the test mode. In test mode, no\n            annotations can be accessed. Default: False.\n        max_sweeps (int, optional): Max number of sweeps. Default: 10.\n\n    Returns:\n        tuple[list[dict]]: Information of training set and validation set\n            that will be saved to the info file.\n    \"\"\"\n    train_nusc_infos = []\n    val_nusc_infos = []\n    frame_idx = 0\n    cat2idx = {}\n    for i, name in enumerate(nus_categories):\n        cat2idx[name] = i\n\n    for sample in mmcv.track_iter_progress(nusc.sample):\n        lidar_token = sample['data']['LIDAR_TOP']\n        sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP'])\n        cs_record = nusc.get('calibrated_sensor',\n                             sd_rec['calibrated_sensor_token'])\n        pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token'])\n        if sample['prev'] != '':\n            sample_prev = nusc.get('sample', sample['prev'])\n            sd_rec_prev = nusc.get('sample_data', sample_prev['data']['LIDAR_TOP'])\n            pose_record_prev = nusc.get('ego_pose', sd_rec_prev['ego_pose_token'])\n        else:\n            pose_record_prev = None\n        if sample['next'] != '':\n            sample_next = nusc.get('sample', sample['next'])\n            sd_rec_next = nusc.get('sample_data', sample_next['data']['LIDAR_TOP'])\n            pose_record_next = nusc.get('ego_pose', sd_rec_next['ego_pose_token'])\n        else:\n            pose_record_next = None\n        lidar_path, boxes, _ = nusc.get_sample_data(lidar_token)\n\n        mmcv.check_file_exist(lidar_path)\n        can_bus = _get_can_bus_info(nusc, nusc_can_bus, sample)\n        info = {\n            'lidar_path': lidar_path,\n            'token': sample['token'],\n            'prev': sample['prev'],\n            'next': sample['next'],\n            'can_bus': can_bus,\n            'sweeps': [],\n            'frame_idx': frame_idx,\n            'cams': dict(),\n            'scene_token': sample['scene_token'],\n            'lidar2ego_translation': cs_record['translation'],\n            'lidar2ego_rotation': cs_record['rotation'],\n            'ego2global_translation': pose_record['translation'],\n            'ego2global_rotation': pose_record['rotation'],\n            'timestamp': sample['timestamp'],\n        }\n\n        l2e_r = info['lidar2ego_rotation']\n        l2e_t = info['lidar2ego_translation']\n        e2g_r = info['ego2global_rotation']\n        e2g_t = info['ego2global_translation']\n        l2e_r_mat = Quaternion(l2e_r).rotation_matrix\n        e2g_r_mat = Quaternion(e2g_r).rotation_matrix\n\n        if sample['next'] == '':\n            frame_idx = 0\n        else:\n            frame_idx += 1\n\n        # obtain 6 image's information per frame\n        camera_types = [\n            'CAM_FRONT',\n            'CAM_FRONT_RIGHT',\n            'CAM_FRONT_LEFT',\n            'CAM_BACK',\n            'CAM_BACK_LEFT',\n            'CAM_BACK_RIGHT',\n        ]\n        for cam in camera_types:\n            cam_token = sample['data'][cam]\n            cam_path, _, cam_intrinsic = nusc.get_sample_data(cam_token)\n            cam_info = obtain_sensor2top(nusc, cam_token, l2e_t, l2e_r_mat,\n                                         e2g_t, e2g_r_mat, cam)\n            cam_info.update(cam_intrinsic=cam_intrinsic)\n            info['cams'].update({cam: cam_info})\n\n        # obtain sweeps for a single key-frame\n        sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP'])\n        sweeps = []\n        while len(sweeps) < max_sweeps:\n            if not sd_rec['prev'] == '':\n                sweep = obtain_sensor2top(nusc, sd_rec['prev'], l2e_t,\n                                          l2e_r_mat, e2g_t, e2g_r_mat, 'lidar')\n                sweeps.append(sweep)\n                sd_rec = nusc.get('sample_data', sd_rec['prev'])\n            else:\n                break\n        info['sweeps'] = sweeps\n        # obtain annotation\n\n        if not test:\n            annotations = [\n                nusc.get('sample_annotation', token)\n                for token in sample['anns']\n            ]\n            locs = np.array([b.center for b in boxes]).reshape(-1, 3)\n            dims = np.array([b.wlh for b in boxes]).reshape(-1, 3)\n            rots = np.array([b.orientation.yaw_pitch_roll[0]\n                             for b in boxes]).reshape(-1, 1)\n            velocity = np.array(\n                [nusc.box_velocity(token)[:2] for token in sample['anns']])\n            valid_flag = np.array(\n                [(anno['num_lidar_pts'] + anno['num_radar_pts']) > 0\n                 for anno in annotations],\n                dtype=bool).reshape(-1)\n  \n            # add instance_ids\n            instance_inds = [nusc.getind('instance', ann['instance_token']) for ann in annotations]\n\n            future_traj_all, future_traj_valid_mask_all = _get_future_traj_info(nusc, sample, in_agent_frame=False)\n            # from IPython import embed\n            # embed()\n            # exit()\n            # future_traj_all_rel, future_traj_valid_mask_all = _get_future_traj_info(nusc, sample, in_agent_frame=True)\n\n            instance_tokens = [ann['instance_token'] for ann in annotations]  # dtype('<U[length_of_str]')\n            # TODO: Add traj in next dataset_version\n            # future_traj_all, future_traj_valid_mask_all = _get_future_traj_info(nusc, sample)\n\n            # convert velo from global to lidar\n            for i in range(len(boxes)):\n                velo = np.array([*velocity[i], 0.0])\n                velo = velo @ np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(\n                    l2e_r_mat).T\n                velocity[i] = velo[:2]\n\n            names = [b.name for b in boxes]\n            for i in range(len(names)):\n                if names[i] in NuScenesDataset.NameMapping:\n                    names[i] = NuScenesDataset.NameMapping[names[i]]\n            names = np.array(names)\n\n            # update valid now\n            name_in_track = [_a in nus_categories for _a in names]\n            name_in_track = np.array(name_in_track)\n            valid_flag = np.logical_and(valid_flag, name_in_track)\n            \n            # we need to convert box size to\n            # the format of our lidar coordinate system\n            # which is x_size, y_size, z_size (corresponding to l, w, h)\n            gt_boxes = np.concatenate([locs, dims[:, [1, 0, 2]], rots], axis=1)\n            assert len(gt_boxes) == len(\n                annotations), f'{len(gt_boxes)}, {len(annotations)}'\n\n            # get future coords for each box\n            # [num_box, fut_ts*2]\n            fut_ts=12\n            num_box = len(boxes)\n            gt_fut_trajs = np.zeros((num_box, fut_ts, 2))\n            gt_fut_yaw = np.zeros((num_box, fut_ts))\n            gt_fut_masks = np.zeros((num_box, fut_ts))\n            gt_boxes_yaw = -(gt_boxes[:,6] + np.pi / 2)\n            # agent lcf feat (x, y, yaw, vx, vy, width, length, height, type)\n            agent_lcf_feat = np.zeros((num_box, 9))\n            gt_fut_goal = np.zeros((num_box))\n            for i, anno in enumerate(annotations):\n                cur_box = boxes[i]\n                cur_anno = anno\n                agent_lcf_feat[i, 0:2] = cur_box.center[:2]\t\n                agent_lcf_feat[i, 2] = gt_boxes_yaw[i]\n                agent_lcf_feat[i, 3:5] = velocity[i]\n                agent_lcf_feat[i, 5:8] = anno['size'] # width,length,height\n                agent_lcf_feat[i, 8] = cat2idx[names[i]] if names[i] in cat2idx.keys() else -1\n                for j in range(fut_ts):\n                    if cur_anno['next'] != '':\n                        anno_next = nusc.get('sample_annotation', cur_anno['next'])\n                        box_next = Box(\n                            anno_next['translation'], anno_next['size'], Quaternion(anno_next['rotation'])\n                        )\n                        # Move box to ego vehicle coord system.\n                        # box_next.translate(-np.array(pose_record['translation']))\n                        # box_next.rotate(Quaternion(pose_record['rotation']).inverse)\n                        #  Move box to sensor coord system.\n                        # box_next.translate(-np.array(cs_record['translation']))\n                        # box_next.rotate(Quaternion(cs_record['rotation']).inverse)\n                        gt_fut_trajs[i, j] = box_next.center[:2]  # - cur_box.center[:2]\n                        gt_fut_masks[i, j] = 1\n                        # add yaw diff\n                        _, _, box_yaw = quart_to_rpy([cur_box.orientation.x, cur_box.orientation.y,\n                                                      cur_box.orientation.z, cur_box.orientation.w])\n                        _, _, box_yaw_next = quart_to_rpy([box_next.orientation.x, box_next.orientation.y,\n                                                           box_next.orientation.z, box_next.orientation.w])\n                        gt_fut_yaw[i, j] = box_yaw_next - box_yaw\n                        cur_anno = anno_next\n                        cur_box = box_next\n                    else:\n                        gt_fut_trajs[i, j:] = 0\n                        break\n                # get agent goal\n                gt_fut_coords = np.cumsum(gt_fut_trajs[i], axis=-2)\n                coord_diff = gt_fut_coords[-1] - gt_fut_coords[0]\n                if coord_diff.max() < 1.0: # static\n                    gt_fut_goal[i] = 9\n                else:\n                    box_mot_yaw = np.arctan2(coord_diff[1], coord_diff[0]) + np.pi\n                    gt_fut_goal[i] = box_mot_yaw // (np.pi / 4)  # 0-8: goal direction class\n\n\n            # get ego history traj (offset format)\n            ego_his_trajs = np.zeros((his_ts+1, 3))\n            ego_his_trajs_diff = np.zeros((his_ts+1, 3))\n            sample_cur = sample\n            for i in range(his_ts, -1, -1):\n                if sample_cur is not None:\n                    pose_mat = get_global_sensor_pose(sample_cur, nusc, inverse=False)\n                    ego_his_trajs[i] = pose_mat[:3, 3]\n                    has_prev = sample_cur['prev'] != ''\n                    has_next = sample_cur['next'] != ''\n                    if has_next:\n                        sample_next = nusc.get('sample', sample_cur['next'])\n                        pose_mat_next = get_global_sensor_pose(sample_next, nusc, inverse=False)\n                        ego_his_trajs_diff[i] = pose_mat_next[:3, 3] - ego_his_trajs[i]\n                    sample_cur = nusc.get('sample', sample_cur['prev']) if has_prev else None\n                else:\n                    ego_his_trajs[i] = ego_his_trajs[i+1] - ego_his_trajs_diff[i+1]\n                    ego_his_trajs_diff[i] = ego_his_trajs_diff[i+1]\n            \n            # global to ego at lcf\n            ego_his_trajs = ego_his_trajs - np.array(pose_record['translation'])\n            rot_mat = Quaternion(pose_record['rotation']).inverse.rotation_matrix\n            ego_his_trajs = np.dot(rot_mat, ego_his_trajs.T).T\n            # ego to lidar at lcf\n            # ego_his_trajs = ego_his_trajs - np.array(cs_record['translation'])\n            # rot_mat = Quaternion(cs_record['rotation']).inverse.rotation_matrix\n            # ego_his_trajs = np.dot(rot_mat, ego_his_trajs.T).T\n            ego_his_trajs = ego_his_trajs[1:] - ego_his_trajs[:-1]\n\n            # get ego futute traj (offset format)\n            ego_fut_trajs = np.zeros((fut_ts+1, 3))\n            ego_fut_masks = np.zeros((fut_ts+1))\n            sample_cur = sample\n            for i in range(fut_ts+1):\n                pose_mat = get_global_sensor_pose(sample_cur, nusc, inverse=False)\n                ego_fut_trajs[i] = pose_mat[:3, 3]\n                ego_fut_masks[i] = 1\n                if sample_cur['next'] == '':\n                    ego_fut_trajs[i+1:] = ego_fut_trajs[i]\n                    break\n                else:\n                    sample_cur = nusc.get('sample', sample_cur['next'])\n            # global to ego at lcf\n            ego_fut_trajs = ego_fut_trajs - np.array(pose_record['translation'])\n            rot_mat = Quaternion(pose_record['rotation']).inverse.rotation_matrix\n            ego_fut_trajs = np.dot(rot_mat, ego_fut_trajs.T).T\n            # ego to lidar at lcf\n            # ego_fut_trajs = ego_fut_trajs - np.array(cs_record['translation'])\n            # rot_mat = Quaternion(cs_record['rotation']).inverse.rotation_matrix\n            # ego_fut_trajs = np.dot(rot_mat, ego_fut_trajs.T).T\n\n            # drive command according to final fut step offset from lcf\n            if ego_fut_trajs[-1][1] >= 2:\n                command = np.array([1, 0, 0])  # Turn Right\n            elif ego_fut_trajs[-1][1] <= -2:\n                command = np.array([0, 1, 0])  # Turn Left\n            else:\n                command = np.array([0, 0, 1])  # Go Straight\n            # offset from lcf -> per-step offset\n            ego_fut_trajs = ego_fut_trajs[1:] - ego_fut_trajs[:-1]\n\n            ### ego lcf feat (vx, vy, ax, ay, w, length, width, vel, steer), w: yaw角速度\n            ego_lcf_feat = np.zeros(9)\n            # 根据odom推算自车速度及加速度\n            _, _, ego_yaw = quart_to_rpy(pose_record['rotation'])\n            ego_pos = np.array(pose_record['translation'])\n            if pose_record_prev is not None:\n                _, _, ego_yaw_prev = quart_to_rpy(pose_record_prev['rotation'])\n                ego_pos_prev = np.array(pose_record_prev['translation'])\n            if pose_record_next is not None:\n                _, _, ego_yaw_next = quart_to_rpy(pose_record_next['rotation'])\n                ego_pos_next = np.array(pose_record_next['translation'])\n            assert (pose_record_prev is not None) or (pose_record_next is not None), 'prev token and next token all empty'\n            if pose_record_prev is not None:\n                ego_w = (ego_yaw - ego_yaw_prev) / 0.5\n                ego_v = np.linalg.norm(ego_pos[:2] - ego_pos_prev[:2]) / 0.5\n                ego_vy, ego_vx = ego_v * math.cos(ego_yaw + np.pi/2), ego_v * math.sin(ego_yaw + np.pi/2)\n            else:\n                ego_w = (ego_yaw_next - ego_yaw) / 0.5\n                ego_v = np.linalg.norm(ego_pos_next[:2] - ego_pos[:2]) / 0.5\n                ego_vy, ego_vx = ego_v * math.cos(ego_yaw + np.pi/2), ego_v * math.sin(ego_yaw + np.pi/2)\n\n            ref_scene = nusc.get(\"scene\", sample['scene_token'])\n            try:\n                pose_msgs = nusc_can_bus.get_messages(ref_scene['name'],'pose')\n                steer_msgs = nusc_can_bus.get_messages(ref_scene['name'], 'steeranglefeedback')\n                pose_uts = [msg['utime'] for msg in pose_msgs]\n                steer_uts = [msg['utime'] for msg in steer_msgs]\n                ref_utime = sample['timestamp']\n                pose_index = locate_message(pose_uts, ref_utime)\n                pose_data = pose_msgs[pose_index]\n                steer_index = locate_message(steer_uts, ref_utime)\n                steer_data = steer_msgs[steer_index]\n                # initial speed\n                v0 = pose_data[\"vel\"][0]  # [0] means longitudinal velocity  m/s\n                # curvature (positive: turn left)\n                steering = steer_data[\"value\"]\n                # flip x axis if in left-hand traffic (singapore)\n                map_location = nusc.get('log', nusc.get('scene', sample['scene_token'])['log_token'])['location']\n                flip_flag = True if map_location.startswith('singapore') else False\n                if flip_flag:\n                    steering *= -1\n                Kappa = 2 * steering / 2.588\n            except:\n                delta_x = ego_his_trajs[-1, 0] + ego_fut_trajs[0, 0]\n                delta_y = ego_his_trajs[-1, 1] + ego_fut_trajs[0, 1]\n                v0 = np.sqrt(delta_x**2 + delta_y**2)\n                Kappa = 0\n\n            ego_lcf_feat[:2] = np.array([ego_vx, ego_vy]) # can_bus[13:15]\n            ego_lcf_feat[2:4] = can_bus[7:9]\n            ego_lcf_feat[4] = ego_w # can_bus[12]\n            ego_lcf_feat[5:7] = np.array([ego_length, ego_width])\n            ego_lcf_feat[7] = v0\n            ego_lcf_feat[8] = Kappa\n\n            info['gt_boxes'] = gt_boxes\n            info['gt_names'] = names\n\n            info['gt_velocity'] = velocity.reshape(-1, 2)\n            info['num_lidar_pts'] = np.array(\n                [a['num_lidar_pts'] for a in annotations])\n            info['num_radar_pts'] = np.array(\n                [a['num_radar_pts'] for a in annotations])\n            info['valid_flag'] = valid_flag\n            info['gt_agent_fut_trajs'] = gt_fut_trajs.reshape(-1, fut_ts*2).astype(np.float32)\n            info['gt_agent_fut_masks'] = gt_fut_masks.reshape(-1, fut_ts).astype(np.float32)\n            info['gt_agent_lcf_feat'] = agent_lcf_feat.astype(np.float32)\n            info['gt_agent_fut_yaw'] = gt_fut_yaw.astype(np.float32)\n            info['gt_agent_fut_goal'] = gt_fut_goal.astype(np.float32)\n            info['gt_ego_his_trajs'] = ego_his_trajs[:, :2].astype(np.float32)\n            info['gt_ego_fut_trajs'] = ego_fut_trajs[:, :2].astype(np.float32)\n            info['gt_ego_fut_masks'] = ego_fut_masks[1:].astype(np.float32)\n            info['gt_ego_fut_cmd'] = command.astype(np.float32)\n            info['gt_ego_lcf_feat'] = ego_lcf_feat.astype(np.float32)\n\n            info['instance_inds'] = instance_inds\n            info['gt_ins_tokens'] = np.array(instance_tokens)\n            info['fut_traj'] = future_traj_all\n            info['fut_traj_valid_mask'] = future_traj_valid_mask_all\n            # add visibility_tokens\n            visibility_tokens = [int(anno['visibility_token'])\n                                 for anno in annotations]\n            info['visibility_tokens'] = np.array(visibility_tokens) \n\n            # if forecasting:\n            #     fboxes, fannotations, fmasks, ftypes = get_forecasting_annotations(nusc, annotations, forecasting_length)\n            #     locs = [np.array([b.center for b in boxes]).reshape(-1, 3) for boxes in fboxes]\n            #     tokens = [np.array([b.token for b in boxes]) for boxes in fboxes]\n            #     info['forecasting_locs'] = np.array(locs)\n            #     info['forecasting_tokens'] = np.array(tokens)\n            #     info['forecasting_masks'] = np.array(fmasks)\n            #     info['forecasting_types'] = np.array(ftypes)\n\n\n            gt_2dbboxes_cams = []\n            gt_3dbboxes_cams = []\n            centers2d_cams = []\n            gt_2dbboxes_ignore_cams = []\n            gt_2dlabels_cams = []\n            depths_cams = []\n            visibilities = []\n\n\n            for cam_type, cam_info in info['cams'].items():\n                gt_3dbboxes = []\n                gt_2dbboxes = []\n                centers2d = []\n                gt_2dbboxes_ignore = []\n                gt_2dlabels = []\n                depths = []\n                visibility = []\n                (height, width, _) = mmcv.imread(cam_info['data_path']).shape\n                annos_cam = get_2d_boxes(nusc, cam_info['sample_data_token'], visibilities= ['', '1', '2', '3', '4'], mono3d=True)\n                for i, ann in enumerate(annos_cam):\n                    if ann is None:\n                        continue\n                    if ann.get('ignore', False):\n                        continue\n                    x1, y1, w, h = ann['bbox']\n                    inter_w = max(0, min(x1 + w, width) - max(x1, 0))\n                    inter_h = max(0, min(y1 + h, height) - max(y1, 0))\n                    if inter_w * inter_h == 0:\n                        continue\n                    if ann['area'] <= 0 or w < 1 or h < 1:\n                        continue\n                    if ann['category_name'] not in nus_categories:\n                        continue\n                    bbox = [x1, y1, x1 + w, y1 + h]\n                    if ann.get('iscrowd', False):\n                        gt_2dbboxes_ignore.append(bbox)\n                    else:\n                        gt_2dbboxes.append(bbox)\n                        gt_2dlabels.append(ann['category_id'])\n                        center2d = ann['center2d'][:2]\n                        depth = ann['center2d'][2]\n                        centers2d.append(center2d)\n                        depths.append(depth)\n                        visibility.append(ann['visibility_token'])\n                        gt_3dbboxes.append(ann['bbox_cam3d'])\n                gt_2dbboxes = np.array(gt_2dbboxes, dtype=np.float32)\n                gt_3dbboxes_cam = np.array(gt_3dbboxes, dtype=np.float32)\n                gt_2dlabels = np.array(gt_2dlabels, dtype=np.int64)\n                centers2d = np.array(centers2d, dtype=np.float32)\n                depths = np.array(depths, dtype=np.float32)\n                gt_2dbboxes_ignore = np.array(gt_2dbboxes_ignore, dtype=np.float32)\n                gt_2dbboxes_cams.append(gt_2dbboxes)\n                gt_2dlabels_cams.append(gt_2dlabels)\n                centers2d_cams.append(centers2d)\n                gt_3dbboxes_cams.append(gt_3dbboxes_cam)\n                depths_cams.append(depths)\n                gt_2dbboxes_ignore_cams.append(gt_2dbboxes_ignore)\n                visibilities.append(visibility)\n                info.update( \n                dict(\n                    bboxes2d=gt_2dbboxes_cams,\n                    bboxes3d_cams=gt_3dbboxes_cams,\n                    labels2d=gt_2dlabels_cams,\n                    centers2d=centers2d_cams,\n                    depths=depths_cams,\n                    bboxes_ignore=gt_2dbboxes_ignore_cams,\n                    visibilities = visibilities,)\n            )\n        \n        if sample['scene_token'] in train_scenes:\n            train_nusc_infos.append(info)\n        else:\n            val_nusc_infos.append(info)\n\n    return train_nusc_infos, val_nusc_infos\n\ndef get_global_sensor_pose(rec, nusc, inverse=False):\n    lidar_sample_data = nusc.get('sample_data', rec['data']['LIDAR_TOP'])\n\n    sd_ep = nusc.get(\"ego_pose\", lidar_sample_data[\"ego_pose_token\"])\n    sd_cs = nusc.get(\"calibrated_sensor\", lidar_sample_data[\"calibrated_sensor_token\"])\n    if inverse is False:\n        global_from_ego = transform_matrix(sd_ep[\"translation\"], Quaternion(sd_ep[\"rotation\"]), inverse=False)\n        ego_from_sensor = transform_matrix(sd_cs[\"translation\"], Quaternion(sd_cs[\"rotation\"]), inverse=False)\n        pose = global_from_ego.dot(ego_from_sensor)\n        # translation equivalent writing\n        # pose_translation = np.array(sd_cs[\"translation\"])\n        # rot_mat = Quaternion(sd_ep['rotation']).rotation_matrix\n        # pose_translation = np.dot(rot_mat, pose_translation)\n        # # pose_translation = pose[:3, 3]\n        # pose_translation = pose_translation + np.array(sd_ep[\"translation\"])\n    else:\n        sensor_from_ego = transform_matrix(sd_cs[\"translation\"], Quaternion(sd_cs[\"rotation\"]), inverse=True)\n        ego_from_global = transform_matrix(sd_ep[\"translation\"], Quaternion(sd_ep[\"rotation\"]), inverse=True)\n        pose = sensor_from_ego.dot(ego_from_global)\n    return pose\n\ndef obtain_sensor2top(nusc,\n                      sensor_token,\n                      l2e_t,\n                      l2e_r_mat,\n                      e2g_t,\n                      e2g_r_mat,\n                      sensor_type='lidar'):\n    \"\"\"Obtain the info with RT matric from general sensor to Top LiDAR.\n\n    Args:\n        nusc (class): Dataset class in the nuScenes dataset.\n        sensor_token (str): Sample data token corresponding to the\n            specific sensor type.\n        l2e_t (np.ndarray): Translation from lidar to ego in shape (1, 3).\n        l2e_r_mat (np.ndarray): Rotation matrix from lidar to ego\n            in shape (3, 3).\n        e2g_t (np.ndarray): Translation from ego to global in shape (1, 3).\n        e2g_r_mat (np.ndarray): Rotation matrix from ego to global\n            in shape (3, 3).\n        sensor_type (str, optional): Sensor to calibrate. Default: 'lidar'.\n\n    Returns:\n        sweep (dict): Sweep information after transformation.\n    \"\"\"\n    sd_rec = nusc.get('sample_data', sensor_token)\n    cs_record = nusc.get('calibrated_sensor',\n                         sd_rec['calibrated_sensor_token'])\n    pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token'])\n    data_path = str(nusc.get_sample_data_path(sd_rec['token']))\n    if os.getcwd() in data_path:  # path from lyftdataset is absolute path\n        data_path = data_path.split(f'{os.getcwd()}/')[-1]  # relative path\n    sweep = {\n        'data_path': data_path,\n        'type': sensor_type,\n        'sample_data_token': sd_rec['token'],\n        'sensor2ego_translation': cs_record['translation'],\n        'sensor2ego_rotation': cs_record['rotation'],\n        'ego2global_translation': pose_record['translation'],\n        'ego2global_rotation': pose_record['rotation'],\n        'timestamp': sd_rec['timestamp']\n    }\n    l2e_r_s = sweep['sensor2ego_rotation']\n    l2e_t_s = sweep['sensor2ego_translation']\n    e2g_r_s = sweep['ego2global_rotation']\n    e2g_t_s = sweep['ego2global_translation']\n\n    # obtain the RT from sensor to Top LiDAR\n    # sweep->ego->global->ego'->lidar\n    l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix\n    e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix\n    R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ (\n        np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)\n    T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ (\n        np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)\n    T -= e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T\n                  ) + l2e_t @ np.linalg.inv(l2e_r_mat).T\n    sweep['sensor2lidar_rotation'] = R.T  # points @ R.T + T\n    sweep['sensor2lidar_translation'] = T\n    return sweep\n\n\ndef export_2d_annotation(root_path, info_path, version, mono3d=True):\n    \"\"\"Export 2d annotation from the info file and raw data.\n\n    Args:\n        root_path (str): Root path of the raw data.\n        info_path (str): Path of the info file.\n        version (str): Dataset version.\n        mono3d (bool, optional): Whether to export mono3d annotation.\n            Default: True.\n    \"\"\"\n    # get bbox annotations for camera\n    camera_types = [\n        'CAM_FRONT',\n        'CAM_FRONT_RIGHT',\n        'CAM_FRONT_LEFT',\n        'CAM_BACK',\n        'CAM_BACK_LEFT',\n        'CAM_BACK_RIGHT',\n    ]\n    nusc_infos = mmcv.load(info_path)['infos']\n    nusc = NuScenes(version=version, dataroot=root_path, verbose=True)\n    # info_2d_list = []\n    cat2Ids = [\n        dict(id=nus_categories.index(cat_name), name=cat_name)\n        for cat_name in nus_categories\n    ]\n    coco_ann_id = 0\n    coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)\n    for info in mmcv.track_iter_progress(nusc_infos):\n        for cam in camera_types:\n            cam_info = info['cams'][cam]\n            coco_infos = get_2d_boxes(\n                nusc,\n                cam_info['sample_data_token'],\n                visibilities=['', '1', '2', '3', '4'],\n                mono3d=mono3d)\n            (height, width, _) = mmcv.imread(cam_info['data_path']).shape\n            coco_2d_dict['images'].append(\n                dict(\n                    file_name=cam_info['data_path'].split('data/nuscenes/')\n                    [-1],\n                    id=cam_info['sample_data_token'],\n                    token=info['token'],\n                    cam2ego_rotation=cam_info['sensor2ego_rotation'],\n                    cam2ego_translation=cam_info['sensor2ego_translation'],\n                    ego2global_rotation=info['ego2global_rotation'],\n                    ego2global_translation=info['ego2global_translation'],\n                    cam_intrinsic=cam_info['cam_intrinsic'],\n                    width=width,\n                    height=height))\n            for coco_info in coco_infos:\n                if coco_info is None:\n                    continue\n                # add an empty key for coco format\n                coco_info['segmentation'] = []\n                coco_info['id'] = coco_ann_id\n                coco_2d_dict['annotations'].append(coco_info)\n                coco_ann_id += 1\n    if mono3d:\n        json_prefix = f'{info_path[:-4]}_mono3d'\n    else:\n        json_prefix = f'{info_path[:-4]}'\n    mmcv.dump(coco_2d_dict, f'{json_prefix}.coco.json')\n\n\ndef get_2d_boxes(nusc,\n                 sample_data_token: str,\n                 visibilities: List[str],\n                 mono3d=True):\n    \"\"\"Get the 2D annotation records for a given `sample_data_token`.\n\n    Args:\n        sample_data_token (str): Sample data token belonging to a camera\n            keyframe.\n        visibilities (list[str]): Visibility filter.\n        mono3d (bool): Whether to get boxes with mono3d annotation.\n\n    Return:\n        list[dict]: List of 2D annotation record that belongs to the input\n            `sample_data_token`.\n    \"\"\"\n\n    # Get the sample data and the sample corresponding to that sample data.\n    sd_rec = nusc.get('sample_data', sample_data_token)\n\n    assert sd_rec[\n        'sensor_modality'] == 'camera', 'Error: get_2d_boxes only works' \\\n        ' for camera sample_data!'\n    if not sd_rec['is_key_frame']:\n        raise ValueError(\n            'The 2D re-projections are available only for keyframes.')\n\n    s_rec = nusc.get('sample', sd_rec['sample_token'])\n\n    # Get the calibrated sensor and ego pose\n    # record to get the transformation matrices.\n    cs_rec = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token'])\n    pose_rec = nusc.get('ego_pose', sd_rec['ego_pose_token'])\n    camera_intrinsic = np.array(cs_rec['camera_intrinsic'])\n\n    # Get all the annotation with the specified visibilties.\n    ann_recs = [\n        nusc.get('sample_annotation', token) for token in s_rec['anns']\n    ]\n    ann_recs = [\n        ann_rec for ann_rec in ann_recs\n        if (ann_rec['visibility_token'] in visibilities)\n    ]\n\n    repro_recs = []\n\n    for ann_rec in ann_recs:\n        # Augment sample_annotation with token information.\n        ann_rec['sample_annotation_token'] = ann_rec['token']\n        ann_rec['sample_data_token'] = sample_data_token\n\n        # Get the box in global coordinates.\n        box = nusc.get_box(ann_rec['token'])\n\n        # Move them to the ego-pose frame.\n        box.translate(-np.array(pose_rec['translation']))\n        box.rotate(Quaternion(pose_rec['rotation']).inverse)\n\n        # Move them to the calibrated sensor frame.\n        box.translate(-np.array(cs_rec['translation']))\n        box.rotate(Quaternion(cs_rec['rotation']).inverse)\n\n        # Filter out the corners that are not in front of the calibrated\n        # sensor.\n        corners_3d = box.corners()\n        in_front = np.argwhere(corners_3d[2, :] > 0).flatten()\n        corners_3d = corners_3d[:, in_front]\n\n        # Project 3d box to 2d.\n        corner_coords = view_points(corners_3d, camera_intrinsic,\n                                    True).T[:, :2].tolist()\n\n        # Keep only corners that fall within the image.\n        final_coords = post_process_coords(corner_coords)\n\n        # Skip if the convex hull of the re-projected corners\n        # does not intersect the image canvas.\n        if final_coords is None:\n            continue\n        else:\n            min_x, min_y, max_x, max_y = final_coords\n\n        # Generate dictionary record to be included in the .json file.\n        repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y,\n                                    sample_data_token, sd_rec['filename'])\n        if repro_rec is None:\n            continue\n        # If mono3d=True, add 3D annotations in camera coordinates\n        if mono3d and (repro_rec is not None):\n            loc = box.center.tolist()\n\n            dim = box.wlh\n            dim[[0, 1, 2]] = dim[[1, 2, 0]]  # convert wlh to our lhw\n            dim = dim.tolist()\n\n            rot = box.orientation.yaw_pitch_roll[0]\n            rot = [-rot]  # convert the rot to our cam coordinate\n\n            global_velo2d = nusc.box_velocity(box.token)[:2]\n            global_velo3d = np.array([*global_velo2d, 0.0])\n            e2g_r_mat = Quaternion(pose_rec['rotation']).rotation_matrix\n            c2e_r_mat = Quaternion(cs_rec['rotation']).rotation_matrix\n            cam_velo3d = global_velo3d @ np.linalg.inv(\n                e2g_r_mat).T @ np.linalg.inv(c2e_r_mat).T\n            velo = cam_velo3d[0::2].tolist()\n\n            repro_rec['bbox_cam3d'] = loc + dim + rot\n            repro_rec['velo_cam3d'] = velo\n\n            center3d = np.array(loc).reshape([1, 3])\n            center2d = points_cam2img(\n                center3d, camera_intrinsic, with_depth=True)\n            repro_rec['center2d'] = center2d.squeeze().tolist()\n            # normalized center2D + depth\n            # if samples with depth < 0 will be removed\n            if repro_rec['center2d'][2] <= 0:\n                continue\n\n            ann_token = nusc.get('sample_annotation',\n                                 box.token)['attribute_tokens']\n            if len(ann_token) == 0:\n                attr_name = 'None'\n            else:\n                attr_name = nusc.get('attribute', ann_token[0])['name']\n            attr_id = nus_attributes.index(attr_name)\n            repro_rec['attribute_name'] = attr_name\n            repro_rec['attribute_id'] = attr_id\n\n        repro_recs.append(repro_rec)\n\n    return repro_recs\n\n\ndef post_process_coords(\n    corner_coords: List, imsize: Tuple[int, int] = (1600, 900)\n) -> Union[Tuple[float, float, float, float], None]:\n    \"\"\"Get the intersection of the convex hull of the reprojected bbox corners\n    and the image canvas, return None if no intersection.\n\n    Args:\n        corner_coords (list[int]): Corner coordinates of reprojected\n            bounding box.\n        imsize (tuple[int]): Size of the image canvas.\n\n    Return:\n        tuple [float]: Intersection of the convex hull of the 2D box\n            corners and the image canvas.\n    \"\"\"\n    polygon_from_2d_box = MultiPoint(corner_coords).convex_hull\n    img_canvas = box(0, 0, imsize[0], imsize[1])\n\n    if polygon_from_2d_box.intersects(img_canvas):\n        img_intersection = polygon_from_2d_box.intersection(img_canvas)\n        intersection_coords = np.array(\n            [coord for coord in img_intersection.exterior.coords])\n\n        min_x = min(intersection_coords[:, 0])\n        min_y = min(intersection_coords[:, 1])\n        max_x = max(intersection_coords[:, 0])\n        max_y = max(intersection_coords[:, 1])\n\n        return min_x, min_y, max_x, max_y\n    else:\n        return None\n\n\ndef generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,\n                    sample_data_token: str, filename: str) -> OrderedDict:\n    \"\"\"Generate one 2D annotation record given various information on top of\n    the 2D bounding box coordinates.\n\n    Args:\n        ann_rec (dict): Original 3d annotation record.\n        x1 (float): Minimum value of the x coordinate.\n        y1 (float): Minimum value of the y coordinate.\n        x2 (float): Maximum value of the x coordinate.\n        y2 (float): Maximum value of the y coordinate.\n        sample_data_token (str): Sample data token.\n        filename (str):The corresponding image file where the annotation\n            is present.\n\n    Returns:\n        dict: A sample 2D annotation record.\n            - file_name (str): file name\n            - image_id (str): sample data token\n            - area (float): 2d box area\n            - category_name (str): category name\n            - category_id (int): category id\n            - bbox (list[float]): left x, top y, dx, dy of 2d box\n            - iscrowd (int): whether the area is crowd\n    \"\"\"\n    repro_rec = OrderedDict()\n    repro_rec['sample_data_token'] = sample_data_token\n    coco_rec = dict()\n\n    relevant_keys = [\n        'attribute_tokens',\n        'category_name',\n        'instance_token',\n        'next',\n        'num_lidar_pts',\n        'num_radar_pts',\n        'prev',\n        'sample_annotation_token',\n        'sample_data_token',\n        'visibility_token',\n    ]\n\n    for key, value in ann_rec.items():\n        if key in relevant_keys:\n            repro_rec[key] = value\n\n    repro_rec['bbox_corners'] = [x1, y1, x2, y2]\n    repro_rec['filename'] = filename\n\n    coco_rec['file_name'] = filename\n    coco_rec['image_id'] = sample_data_token\n    coco_rec['area'] = (y2 - y1) * (x2 - x1)\n\n    if repro_rec['category_name'] not in NuScenesDataset.NameMapping:\n        return None\n    cat_name = NuScenesDataset.NameMapping[repro_rec['category_name']]\n    #filter useless categories for tracking\n    if cat_name not in nus_categories:\n        return None\n    coco_rec['category_name'] = cat_name\n    coco_rec['category_id'] = nus_categories.index(cat_name)\n    coco_rec['bbox'] = [x1, y1, x2 - x1, y2 - y1]\n    coco_rec['iscrowd'] = 0\n    coco_rec['visibility_token'] = repro_rec['visibility_token']\n\n\n    return coco_rec\n\n\nif __name__ == '__main__':\n    export_2d_annotation('/mount/data/lsbevv2/data/nuscenes', '/mount/data/lsbevv2/data/nuscenes/bevdetv2-nuscenes_infos_val.pkl', 'v1.0-trainval', False)"
  },
  {
    "path": "tools/data_converter/nuscenes_prediction_tools.py",
    "content": "# ------------------------------------------------------------------------\n# Copyright (c) 2023 toyota research instutute.\n# ------------------------------------------------------------------------\n# Modified from FutureDet (https://github.com/neeharperi/FutureDet)\n# ------------------------------------------------------------------------\nimport numpy as np\nfrom pyquaternion import Quaternion\nfrom nuscenes import NuScenes\nfrom nuscenes.utils.data_classes import Box\nfrom itertools import tee\nfrom copy import deepcopy\n\n\ndef get_forecasting_annotations(nusc: NuScenes, annotations, length):\n    \"\"\"Acquire the trajectories for each box\n    \"\"\"\n    forecast_annotations = []\n    forecast_boxes = []   \n    forecast_trajectory_type = []\n    forecast_visibility_mask = []\n    sample_tokens = [s[\"token\"] for s in nusc.sample]\n\n    for annotation in annotations:\n        tracklet_box = []\n        tracklet_annotation = []\n        tracklet_visiblity_mask = []\n        tracklet_trajectory_type = []\n\n        token = nusc.sample[sample_tokens.index(annotation[\"sample_token\"])][\"data\"][\"LIDAR_TOP\"]\n        sd_record = nusc.get(\"sample_data\", token)\n        cs_record = nusc.get(\"calibrated_sensor\", sd_record[\"calibrated_sensor_token\"])\n        pose_record = nusc.get(\"ego_pose\", sd_record[\"ego_pose_token\"])\n\n        visibility = True\n        for step in range(length):\n            box = Box(center = annotation[\"translation\"],\n                      size = annotation[\"size\"],\n                      orientation = Quaternion(annotation[\"rotation\"]),\n                      velocity = nusc.box_velocity(annotation[\"token\"]),\n                      name = annotation[\"category_name\"],\n                      token = annotation[\"token\"])\n            \n            # move box to the ego-system when the prediction is made\n            box.translate(-np.array(pose_record[\"translation\"]))\n            box.rotate(Quaternion(pose_record[\"rotation\"]).inverse)\n\n            #  Move box to sensor coord system\n            box.translate(-np.array(cs_record[\"translation\"]))\n            box.rotate(Quaternion(cs_record[\"rotation\"]).inverse)\n\n            tracklet_box.append(box)\n            tracklet_annotation.append(annotation)\n            tracklet_visiblity_mask.append(visibility)\n\n            next_token = annotation['next']\n            if next_token != '':\n                annotation = nusc.get('sample_annotation', next_token)\n            else:\n                # if the trajectory cannot be prolonged anymore,\n                # use the last one to pad and set the visibility flag\n                annotation = annotation\n                visibility = False\n    \n        tokens = [b[\"sample_token\"] for b in tracklet_annotation]\n        time = [get_time(nusc, src, dst) for src, dst in window(tokens, 2)]\n        tracklet_trajectory_type = trajectory_type(nusc, tracklet_box, time, length) # same as FutureDet\n\n        forecast_boxes.append(tracklet_box)\n        forecast_annotations.append(tracklet_annotation)\n        forecast_trajectory_type.append(length * [tracklet_trajectory_type])\n        forecast_visibility_mask.append(tracklet_visiblity_mask)\n    return forecast_boxes, forecast_annotations, forecast_visibility_mask, forecast_trajectory_type\n\n\ndef window(iterable, size):\n    iters = tee(iterable, size)\n    for i in range(1, size):\n        for each in iters[i:]:\n            next(each, None)\n\n    return zip(*iters)\n\ndef get_time(nusc, src_token, dst_token):\n    time_last = 1e-6 * nusc.get('sample', src_token)[\"timestamp\"]\n    time_first = 1e-6 * nusc.get('sample', dst_token)[\"timestamp\"]\n    time_diff = time_first - time_last\n\n    return time_diff \n\n\ndef center_distance(gt_box, pred_box) -> float:\n    \"\"\"\n    L2 distance between the box centers (xy only).\n    :param gt_box: GT annotation sample.\n    :param pred_box: Predicted sample.\n    :return: L2 distance.\n    \"\"\"\n    return np.linalg.norm(np.array(pred_box.center[:2]) - np.array(gt_box.center[:2]))\n\n\ndef trajectory_type(nusc, boxes, time, timesteps=7, past=False):\n    target = boxes[-1]\n    \n    static_forecast = deepcopy(boxes[0])\n\n    linear_forecast = deepcopy(boxes[0])\n    vel = linear_forecast.velocity[:2]\n    disp = np.sum(list(map(lambda x: np.array(list(vel) + [0]) * x, time)), axis=0)\n\n    if past:\n        linear_forecast.center = linear_forecast.center - disp\n\n    else:\n        linear_forecast.center = linear_forecast.center + disp\n    \n    if center_distance(target, static_forecast) < max(target.wlh[0], target.wlh[1]):\n        # return \"static\"\n        return 0\n\n    elif center_distance(target, linear_forecast) < max(target.wlh[0], target.wlh[1]):\n        # return \"linear\"\n        return 1\n\n    else:\n        # return \"nonlinear\"\n        return 2"
  },
  {
    "path": "tools/data_converter/nuscenes_track_converter.py",
    "content": "# ------------------------------------------------------------------------\n# Copyright (c) 2023 toyota research instutute.\n# ------------------------------------------------------------------------\n# Modified from MUTR3D (https://github.com/a1600012888/MUTR3D)\n# Copyright (c) 2022 Tianyuan Zhang\n# ------------------------------------------------------------------------\n# Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d)\n# Copyright (c) OpenMMLab. All rights reserved.\n# ------------------------------------------------------------------------\nimport mmcv\nimport numpy as np\nimport os\nfrom collections import OrderedDict\nfrom nuscenes.nuscenes import NuScenes\nfrom nuscenes.utils.geometry_utils import view_points\nfrom os import path as osp\nfrom pyquaternion import Quaternion\nfrom shapely.geometry import MultiPoint, box\nfrom typing import List, Tuple, Union\nfrom mmdet3d.core.bbox.box_np_ops import points_cam2img\nfrom projects.tracking_plugin.datasets.nuscenes_tracking_dataset import NuScenesTrackingDataset as NuScenesDataset\nfrom data_converter.nuscenes_prediction_tools import get_forecasting_annotations\n\n\n#  remove the classes barrier, trafficcone and construction_vehicle\nnus_categories = (\n    'car', 'truck', 'bus', 'trailer', \n    'motorcycle', 'bicycle', 'pedestrian', \n    'construction_vehicle', 'traffic_cone', 'barrier')\n\nnus_attributes = ('cycle.with_rider', 'cycle.without_rider',\n                  'pedestrian.moving', 'pedestrian.standing',\n                  'pedestrian.sitting_lying_down', 'vehicle.moving',\n                  'vehicle.parked', 'vehicle.stopped', 'None')\n\n\ndef create_nuscenes_infos(root_path,\n                          out_dir,\n                          info_prefix,\n                          version='v1.0-trainval',\n                          max_sweeps=10,\n                          forecasting=False,\n                          forecasting_length=13):\n    \"\"\"Create info file of nuscene dataset.\n    Given the raw data, generate its related info file in pkl format.\n    Args:\n        root_path (str): Path of the data root.\n        info_prefix (str): Prefix of the info file to be generated.\n        version (str): Version of the data.\n            Default: 'v1.0-trainval'\n        max_sweeps (int): Max number of sweeps.\n            Default: 10\n        forecasting (bool): If prepare for forecasting data\n        forecasting_length (int): Max frame number for forecasting.\n            Default: 13 (6 seconds + current frame)\n    \"\"\"\n    from nuscenes.nuscenes import NuScenes\n    nusc = NuScenes(version=version, dataroot=root_path, verbose=True)\n    from nuscenes.utils import splits\n    available_vers = ['v1.0-trainval', 'v1.0-test', 'v1.0-mini']\n    assert version in available_vers\n    if version == 'v1.0-trainval':\n        train_scenes = splits.train\n        val_scenes = splits.val\n    elif version == 'v1.0-test':\n        train_scenes = splits.test\n        val_scenes = []\n    elif version == 'v1.0-mini':\n        train_scenes = splits.mini_train\n        val_scenes = splits.mini_val\n        info_prefix = info_prefix + '-mini'\n    else:\n        raise ValueError('unknown')\n\n    # filter existing scenes.\n    available_scenes = get_available_scenes(nusc)\n    available_scene_names = [s['name'] for s in available_scenes]\n    train_scenes = list(\n        filter(lambda x: x in available_scene_names, train_scenes))\n    val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes))\n    train_scenes = set([\n        available_scenes[available_scene_names.index(s)]['token']\n        for s in train_scenes\n    ])\n    val_scenes = set([\n        available_scenes[available_scene_names.index(s)]['token']\n        for s in val_scenes\n    ])\n\n    test = 'test' in version\n    if test:\n        print('test scene: {}'.format(len(train_scenes)))\n    else:\n        print('train scene: {}, val scene: {}'.format(\n            len(train_scenes), len(val_scenes)))\n    train_nusc_infos, val_nusc_infos = _fill_trainval_infos(\n        nusc, train_scenes, val_scenes, test, max_sweeps=max_sweeps,\n        forecasting=forecasting, forecasting_length=forecasting_length)\n\n    metadata = dict(version=version)\n    if test:\n        print('test sample: {}'.format(len(train_nusc_infos)))\n        data = dict(infos=train_nusc_infos, metadata=metadata)\n        info_path = osp.join(out_dir,\n                             '{}_infos_test.pkl'.format(info_prefix))\n        mmcv.dump(data, info_path)\n    else:\n        print('train sample: {}, val sample: {}'.format(\n            len(train_nusc_infos), len(val_nusc_infos)))\n        data = dict(infos=train_nusc_infos, metadata=metadata)\n        info_path = osp.join(out_dir,\n                             '{}_infos_train.pkl'.format(info_prefix))\n        mmcv.dump(data, info_path)\n        data['infos'] = val_nusc_infos\n        info_val_path = osp.join(out_dir,\n                                 '{}_infos_val.pkl'.format(info_prefix))\n        mmcv.dump(data, info_val_path)\n\n\ndef get_available_scenes(nusc):\n    \"\"\"Get available scenes from the input nuscenes class.\n    Given the raw data, get the information of available scenes for\n    further info generation.\n    Args:\n        nusc (class): Dataset class in the nuScenes dataset.\n    Returns:\n        available_scenes (list[dict]): List of basic information for the\n            available scenes.\n    \"\"\"\n    available_scenes = []\n    print('total scene num: {}'.format(len(nusc.scene)))\n    for scene in nusc.scene:\n        scene_token = scene['token']\n        scene_rec = nusc.get('scene', scene_token)\n        sample_rec = nusc.get('sample', scene_rec['first_sample_token'])\n        sd_rec = nusc.get('sample_data', sample_rec['data']['LIDAR_TOP'])\n        has_more_frames = True\n        scene_not_exist = False\n        while has_more_frames:\n            lidar_path, boxes, _ = nusc.get_sample_data(sd_rec['token'])\n            lidar_path = str(lidar_path)\n            if os.getcwd() in lidar_path:\n                # path from lyftdataset is absolute path\n                lidar_path = lidar_path.split(f'{os.getcwd()}/')[-1]\n                # relative path\n            if not mmcv.is_filepath(lidar_path):\n                scene_not_exist = True\n                break\n            else:\n                break\n        if scene_not_exist:\n            continue\n        available_scenes.append(scene)\n    print('exist scene num: {}'.format(len(available_scenes)))\n    return available_scenes\n\n\ndef _fill_trainval_infos(nusc,\n                         train_scenes,\n                         val_scenes,\n                         test=False,\n                         max_sweeps=10,\n                         forecasting=False,\n                         forecasting_length=13):\n    \"\"\"Generate the train/val infos from the raw data.\n    Args:\n        nusc (:obj:`NuScenes`): Dataset class in the nuScenes dataset.\n        train_scenes (list[str]): Basic information of training scenes.\n        val_scenes (list[str]): Basic information of validation scenes.\n        test (bool): Whether use the test mode. In the test mode, no\n            annotations can be accessed. Default: False.\n        max_sweeps (int): Max number of sweeps. Default: 10.\n        forecasting (bool): If prepare for forecasting data\n        forecasting_length (int): Max frame number for forecasting.\n            Default: 13 (6 seconds + current frame)\n    Returns:\n        tuple[list[dict]]: Information of training set and validation set\n            that will be saved to the info file.\n    \"\"\"\n    train_nusc_infos = []\n    val_nusc_infos = []\n\n    frame_idx = 0\n    for sample in mmcv.track_iter_progress(nusc.sample):\n        lidar_token = sample['data']['LIDAR_TOP']\n        sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP'])\n        cs_record = nusc.get('calibrated_sensor',\n                             sd_rec['calibrated_sensor_token'])\n        pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token'])\n        lidar_path, boxes, _ = nusc.get_sample_data(lidar_token)\n\n        mmcv.check_file_exist(lidar_path)\n\n        info = {\n            'lidar_path': lidar_path,\n            'token': sample['token'],\n            'sweeps': [],\n            'cams': dict(),\n            'radars': dict(),\n            'lidar2ego_translation': cs_record['translation'],\n            'lidar2ego_rotation': cs_record['rotation'],\n            'ego2global_translation': pose_record['translation'],\n            'ego2global_rotation': pose_record['rotation'],\n            'timestamp': sample['timestamp'],\n            'scene_token': sample['scene_token'],\n            'frame_idx': frame_idx\n        }\n\n        if sample['next'] == '':\n            frame_idx = 0\n        else:\n            frame_idx += 1\n\n        l2e_r = info['lidar2ego_rotation']\n        l2e_t = info['lidar2ego_translation']\n        e2g_r = info['ego2global_rotation']\n        e2g_t = info['ego2global_translation']\n        l2e_r_mat = Quaternion(l2e_r).rotation_matrix\n        e2g_r_mat = Quaternion(e2g_r).rotation_matrix\n\n        # obtain 6 image's information per frame\n        camera_types = [\n            'CAM_FRONT',\n            'CAM_FRONT_RIGHT',\n            'CAM_FRONT_LEFT',\n            'CAM_BACK',\n            'CAM_BACK_LEFT',\n            'CAM_BACK_RIGHT',\n        ]\n        for cam in camera_types:\n            cam_token = sample['data'][cam]\n            cam_path, _, cam_intrinsic = nusc.get_sample_data(cam_token)\n            cam_info = obtain_sensor2top(nusc, cam_token, l2e_t, l2e_r_mat,\n                                         e2g_t, e2g_r_mat, cam)\n            cam_info.update(cam_intrinsic=cam_intrinsic)\n            info['cams'].update({cam: cam_info})\n\n        # obtain sweeps for a single key-frame\n        sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP'])\n        sweeps = []\n        while len(sweeps) < max_sweeps:\n            if not sd_rec['prev'] == '':\n                sweep = obtain_sensor2top(nusc, sd_rec['prev'], l2e_t,\n                                          l2e_r_mat, e2g_t, e2g_r_mat, 'lidar')\n                sweeps.append(sweep)\n                sd_rec = nusc.get('sample_data', sd_rec['prev'])\n            else:\n                break\n        info['sweeps'] = sweeps\n        # obtain annotation\n        if not test:\n            annotations = [\n                nusc.get('sample_annotation', token)\n                for token in sample['anns']\n            ]\n            locs = np.array([b.center for b in boxes]).reshape(-1, 3)\n            dims = np.array([b.wlh for b in boxes]).reshape(-1, 3)\n            rots = np.array([b.orientation.yaw_pitch_roll[0]\n                             for b in boxes]).reshape(-1, 1)\n            velocity = np.array(\n                [nusc.box_velocity(token)[:2] for token in sample['anns']])\n            valid_flag = np.array(\n                [(anno['num_lidar_pts'] + anno['num_radar_pts']) > 0\n                 for anno in annotations],\n                dtype=bool).reshape(-1)\n            # convert velo from global to lidar\n            for i in range(len(boxes)):\n                velo = np.array([*velocity[i], 0.0])\n                velo = velo @ np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(\n                    l2e_r_mat).T\n                velocity[i] = velo[:2]\n\n            names = [b.name for b in boxes]\n            for i in range(len(names)):\n                if names[i] in NuScenesDataset.NameMapping:\n                    names[i] = NuScenesDataset.NameMapping[names[i]]\n            names = np.array(names)\n            # update valid now\n            name_in_track = [_a in nus_categories for _a in names]\n            name_in_track = np.array(name_in_track)\n            valid_flag = np.logical_and(valid_flag, name_in_track)\n\n            # add instance_ids\n            instance_inds = [nusc.getind('instance', ann['instance_token']) for ann in annotations]\n            # we need to convert rot to SECOND format.\n            gt_boxes = np.concatenate([locs, dims, -rots - np.pi / 2], axis=1)\n            assert len(gt_boxes) == len(\n                annotations), f'{len(gt_boxes)}, {len(annotations)}'\n            info['gt_boxes'] = gt_boxes\n            info['gt_names'] = names\n            info['gt_velocity'] = velocity.reshape(-1, 2)\n            info['num_lidar_pts'] = np.array(\n                [a['num_lidar_pts'] for a in annotations])\n            info['num_radar_pts'] = np.array(\n                [a['num_radar_pts'] for a in annotations])\n            info['valid_flag'] = valid_flag\n            info['instance_inds'] = instance_inds\n\n            if forecasting:\n                fboxes, fannotations, fmasks, ftypes = get_forecasting_annotations(nusc, annotations, forecasting_length)\n                locs = [np.array([b.center for b in boxes]).reshape(-1, 3) for boxes in fboxes]\n                tokens = [np.array([b.token for b in boxes]) for boxes in fboxes]\n                info['forecasting_locs'] = np.array(locs)\n                info['forecasting_tokens'] = np.array(tokens)\n                info['forecasting_masks'] = np.array(fmasks)\n                info['forecasting_types'] = np.array(ftypes)\n\n        if sample['scene_token'] in train_scenes:\n            train_nusc_infos.append(info)\n        else:\n            val_nusc_infos.append(info)\n\n    return train_nusc_infos, val_nusc_infos\n\n\ndef obtain_sensor2top(nusc,\n                      sensor_token,\n                      l2e_t,\n                      l2e_r_mat,\n                      e2g_t,\n                      e2g_r_mat,\n                      sensor_type='lidar'):\n    \"\"\"Obtain the info with RT matric from general sensor to Top LiDAR.\n    Args:\n        nusc (class): Dataset class in the nuScenes dataset.\n        sensor_token (str): Sample data token corresponding to the\n            specific sensor type.\n        l2e_t (np.ndarray): Translation from lidar to ego in shape (1, 3).\n        l2e_r_mat (np.ndarray): Rotation matrix from lidar to ego\n            in shape (3, 3).\n        e2g_t (np.ndarray): Translation from ego to global in shape (1, 3).\n        e2g_r_mat (np.ndarray): Rotation matrix from ego to global\n            in shape (3, 3).\n        sensor_type (str): Sensor to calibrate. Default: 'lidar'.\n    Returns:\n        sweep (dict): Sweep information after transformation.\n    \"\"\"\n    sd_rec = nusc.get('sample_data', sensor_token)\n    cs_record = nusc.get('calibrated_sensor',\n                         sd_rec['calibrated_sensor_token'])\n    pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token'])\n    data_path = str(nusc.get_sample_data_path(sd_rec['token']))\n    if os.getcwd() in data_path:  # path from lyftdataset is absolute path\n        data_path = data_path.split(f'{os.getcwd()}/')[-1]  # relative path\n    sweep = {\n        'data_path': data_path,\n        'type': sensor_type,\n        'sample_data_token': sd_rec['token'],\n        'sensor2ego_translation': cs_record['translation'],\n        'sensor2ego_rotation': cs_record['rotation'],\n        'ego2global_translation': pose_record['translation'],\n        'ego2global_rotation': pose_record['rotation'],\n        'timestamp': sd_rec['timestamp']\n    }\n    l2e_r_s = sweep['sensor2ego_rotation']\n    l2e_t_s = sweep['sensor2ego_translation']\n    e2g_r_s = sweep['ego2global_rotation']\n    e2g_t_s = sweep['ego2global_translation']\n\n    # obtain the RT from sensor to Top LiDAR\n    # sweep->ego->global->ego'->lidar\n    l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix\n    e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix\n    R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ (\n        np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)\n    T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ (\n        np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)\n    T -= e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T\n                  ) + l2e_t @ np.linalg.inv(l2e_r_mat).T\n    sweep['sensor2lidar_rotation'] = R.T  # points @ R.T + T\n    sweep['sensor2lidar_translation'] = T\n    return sweep\n\n\ndef export_2d_annotation(root_path, info_path, version, mono3d=True):\n    \"\"\"Export 2d annotation from the info file and raw data.\n    Args:\n        root_path (str): Root path of the raw data.\n        info_path (str): Path of the info file.\n        version (str): Dataset version.\n        mono3d (bool): Whether to export mono3d annotation. Default: True.\n    \"\"\"\n    # get bbox annotations for camera\n    camera_types = [\n        'CAM_FRONT',\n        'CAM_FRONT_RIGHT',\n        'CAM_FRONT_LEFT',\n        'CAM_BACK',\n        'CAM_BACK_LEFT',\n        'CAM_BACK_RIGHT',\n    ]\n    nusc_infos = mmcv.load(info_path)['infos']\n    nusc = NuScenes(version=version, dataroot=root_path, verbose=True)\n    # info_2d_list = []\n    cat2Ids = [\n        dict(id=nus_categories.index(cat_name), name=cat_name)\n        for cat_name in nus_categories\n    ]\n    coco_ann_id = 0\n    coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)\n    for info in mmcv.track_iter_progress(nusc_infos):\n        for cam in camera_types:\n            cam_info = info['cams'][cam]\n            coco_infos = get_2d_boxes(\n                nusc,\n                cam_info['sample_data_token'],\n                visibilities=['', '1', '2', '3', '4'],\n                mono3d=mono3d)\n            (height, width, _) = mmcv.imread(cam_info['data_path']).shape\n            coco_2d_dict['images'].append(\n                dict(\n                    file_name=cam_info['data_path'].split('data/nuscenes/')\n                    [-1],\n                    id=cam_info['sample_data_token'],\n                    token=info['token'],\n                    cam2ego_rotation=cam_info['sensor2ego_rotation'],\n                    cam2ego_translation=cam_info['sensor2ego_translation'],\n                    ego2global_rotation=info['ego2global_rotation'],\n                    ego2global_translation=info['ego2global_translation'],\n                    cam_intrinsic=cam_info['cam_intrinsic'],\n                    width=width,\n                    height=height))\n            for coco_info in coco_infos:\n                if coco_info is None:\n                    continue\n                # add an empty key for coco format\n                coco_info['segmentation'] = []\n                coco_info['id'] = coco_ann_id\n                coco_2d_dict['annotations'].append(coco_info)\n                coco_ann_id += 1\n    if mono3d:\n        json_prefix = f'{info_path[:-4]}_mono3d'\n    else:\n        json_prefix = f'{info_path[:-4]}'\n    mmcv.dump(coco_2d_dict, f'{json_prefix}.coco.json')\n\n\ndef get_2d_boxes(nusc,\n                 sample_data_token: str,\n                 visibilities: List[str],\n                 mono3d=True):\n    \"\"\"Get the 2D annotation records for a given `sample_data_token`.\n    Args:\n        sample_data_token (str): Sample data token belonging to a camera \\\n            keyframe.\n        visibilities (list[str]): Visibility filter.\n        mono3d (bool): Whether to get boxes with mono3d annotation.\n    Return:\n        list[dict]: List of 2D annotation record that belongs to the input\n            `sample_data_token`.\n    \"\"\"\n\n    # Get the sample data and the sample corresponding to that sample data.\n    sd_rec = nusc.get('sample_data', sample_data_token)\n\n    assert sd_rec[\n        'sensor_modality'] == 'camera', 'Error: get_2d_boxes only works' \\\n        ' for camera sample_data!'\n    if not sd_rec['is_key_frame']:\n        raise ValueError(\n            'The 2D re-projections are available only for keyframes.')\n\n    s_rec = nusc.get('sample', sd_rec['sample_token'])\n\n    # Get the calibrated sensor and ego pose\n    # record to get the transformation matrices.\n    cs_rec = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token'])\n    pose_rec = nusc.get('ego_pose', sd_rec['ego_pose_token'])\n    camera_intrinsic = np.array(cs_rec['camera_intrinsic'])\n\n    # Get all the annotation with the specified visibilties.\n    ann_recs = [\n        nusc.get('sample_annotation', token) for token in s_rec['anns']\n    ]\n    ann_recs = [\n        ann_rec for ann_rec in ann_recs\n        if (ann_rec['visibility_token'] in visibilities)\n    ]\n\n    repro_recs = []\n\n    for ann_rec in ann_recs:\n        # Augment sample_annotation with token information.\n        ann_rec['sample_annotation_token'] = ann_rec['token']\n        ann_rec['sample_data_token'] = sample_data_token\n\n        # Get the box in global coordinates.\n        box = nusc.get_box(ann_rec['token'])\n\n        # Move them to the ego-pose frame.\n        box.translate(-np.array(pose_rec['translation']))\n        box.rotate(Quaternion(pose_rec['rotation']).inverse)\n\n        # Move them to the calibrated sensor frame.\n        box.translate(-np.array(cs_rec['translation']))\n        box.rotate(Quaternion(cs_rec['rotation']).inverse)\n\n        # Filter out the corners that are not in front of the calibrated\n        # sensor.\n        corners_3d = box.corners()\n        in_front = np.argwhere(corners_3d[2, :] > 0).flatten()\n        corners_3d = corners_3d[:, in_front]\n\n        # Project 3d box to 2d.\n        corner_coords = view_points(corners_3d, camera_intrinsic,\n                                    True).T[:, :2].tolist()\n\n        # Keep only corners that fall within the image.\n        final_coords = post_process_coords(corner_coords)\n\n        # Skip if the convex hull of the re-projected corners\n        # does not intersect the image canvas.\n        if final_coords is None:\n            continue\n        else:\n            min_x, min_y, max_x, max_y = final_coords\n\n        # Generate dictionary record to be included in the .json file.\n        repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y,\n                                    sample_data_token, sd_rec['filename'])\n\n        # If mono3d=True, add 3D annotations in camera coordinates\n        if mono3d and (repro_rec is not None):\n            loc = box.center.tolist()\n            dim = box.wlh.tolist()\n            rot = [box.orientation.yaw_pitch_roll[0]]\n\n            global_velo2d = nusc.box_velocity(box.token)[:2]\n            global_velo3d = np.array([*global_velo2d, 0.0])\n            e2g_r_mat = Quaternion(pose_rec['rotation']).rotation_matrix\n            c2e_r_mat = Quaternion(cs_rec['rotation']).rotation_matrix\n            cam_velo3d = global_velo3d @ np.linalg.inv(\n                e2g_r_mat).T @ np.linalg.inv(c2e_r_mat).T\n            velo = cam_velo3d[0::2].tolist()\n\n            repro_rec['bbox_cam3d'] = loc + dim + rot\n            repro_rec['velo_cam3d'] = velo\n\n            center3d = np.array(loc).reshape([1, 3])\n            center2d = points_cam2img(\n                center3d, camera_intrinsic, with_depth=True)\n            repro_rec['center2d'] = center2d.squeeze().tolist()\n            # normalized center2D + depth\n            # if samples with depth < 0 will be removed\n            if repro_rec['center2d'][2] <= 0:\n                continue\n\n            ann_token = nusc.get('sample_annotation',\n                                 box.token)['attribute_tokens']\n            if len(ann_token) == 0:\n                attr_name = 'None'\n            else:\n                attr_name = nusc.get('attribute', ann_token[0])['name']\n            attr_id = nus_attributes.index(attr_name)\n            repro_rec['attribute_name'] = attr_name\n            repro_rec['attribute_id'] = attr_id\n\n        repro_recs.append(repro_rec)\n\n    return repro_recs\n\n\ndef post_process_coords(\n    corner_coords: List, imsize: Tuple[int, int] = (1600, 900)\n) -> Union[Tuple[float, float, float, float], None]:\n    \"\"\"Get the intersection of the convex hull of the reprojected bbox corners\n    and the image canvas, return None if no intersection.\n    Args:\n        corner_coords (list[int]): Corner coordinates of reprojected\n            bounding box.\n        imsize (tuple[int]): Size of the image canvas.\n    Return:\n        tuple [float]: Intersection of the convex hull of the 2D box\n            corners and the image canvas.\n    \"\"\"\n    polygon_from_2d_box = MultiPoint(corner_coords).convex_hull\n    img_canvas = box(0, 0, imsize[0], imsize[1])\n\n    if polygon_from_2d_box.intersects(img_canvas):\n        img_intersection = polygon_from_2d_box.intersection(img_canvas)\n        intersection_coords = np.array(\n            [coord for coord in img_intersection.exterior.coords])\n\n        min_x = min(intersection_coords[:, 0])\n        min_y = min(intersection_coords[:, 1])\n        max_x = max(intersection_coords[:, 0])\n        max_y = max(intersection_coords[:, 1])\n\n        return min_x, min_y, max_x, max_y\n    else:\n        return None\n\n\ndef generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,\n                    sample_data_token: str, filename: str) -> OrderedDict:\n    \"\"\"Generate one 2D annotation record given various informations on top of\n    the 2D bounding box coordinates.\n    Args:\n        ann_rec (dict): Original 3d annotation record.\n        x1 (float): Minimum value of the x coordinate.\n        y1 (float): Minimum value of the y coordinate.\n        x2 (float): Maximum value of the x coordinate.\n        y2 (float): Maximum value of the y coordinate.\n        sample_data_token (str): Sample data token.\n        filename (str):The corresponding image file where the annotation\n            is present.\n    Returns:\n        dict: A sample 2D annotation record.\n            - file_name (str): flie name\n            - image_id (str): sample data token\n            - area (float): 2d box area\n            - category_name (str): category name\n            - category_id (int): category id\n            - bbox (list[float]): left x, top y, dx, dy of 2d box\n            - iscrowd (int): whether the area is crowd\n    \"\"\"\n    repro_rec = OrderedDict()\n    repro_rec['sample_data_token'] = sample_data_token\n    coco_rec = dict()\n\n    relevant_keys = [\n        'attribute_tokens',\n        'category_name',\n        'instance_token',\n        'next',\n        'num_lidar_pts',\n        'num_radar_pts',\n        'prev',\n        'sample_annotation_token',\n        'sample_data_token',\n        'visibility_token',\n    ]\n\n    for key, value in ann_rec.items():\n        if key in relevant_keys:\n            repro_rec[key] = value\n\n    repro_rec['bbox_corners'] = [x1, y1, x2, y2]\n    repro_rec['filename'] = filename\n\n    coco_rec['file_name'] = filename\n    coco_rec['image_id'] = sample_data_token\n    coco_rec['area'] = (y2 - y1) * (x2 - x1)\n\n    if repro_rec['category_name'] not in NuScenesDataset.NameMapping:\n        return None\n    cat_name = NuScenesDataset.NameMapping[repro_rec['category_name']]\n    coco_rec['category_name'] = cat_name\n    coco_rec['category_id'] = nus_categories.index(cat_name)\n    coco_rec['bbox'] = [x1, y1, x2 - x1, y2 - y1]\n    coco_rec['iscrowd'] = 0\n\n    return coco_rec\n\n\nif __name__ == '__main__':\n\n    # generate .pkl for train, and val\n    create_nuscenes_infos('data/nuscenes/', 'track')\n\n    # generate .pkl for test set\n    # create_nuscenes_infos('data/nuscenes/', 'track_test', version='v1.0-test')"
  },
  {
    "path": "tools/data_converter/s3dis_data_utils.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os\nfrom concurrent import futures as futures\nfrom os import path as osp\n\nimport mmcv\nimport numpy as np\n\n\nclass S3DISData(object):\n    \"\"\"S3DIS data.\n\n    Generate s3dis infos for s3dis_converter.\n\n    Args:\n        root_path (str): Root path of the raw data.\n        split (str, optional): Set split type of the data. Default: 'Area_1'.\n    \"\"\"\n\n    def __init__(self, root_path, split='Area_1'):\n        self.root_dir = root_path\n        self.split = split\n        self.data_dir = osp.join(root_path,\n                                 'Stanford3dDataset_v1.2_Aligned_Version')\n\n        # Following `GSDN <https://arxiv.org/abs/2006.12356>`_, use 5 furniture\n        # classes for detection: table, chair, sofa, bookcase, board.\n        self.cat_ids = np.array([7, 8, 9, 10, 11])\n        self.cat_ids2class = {\n            cat_id: i\n            for i, cat_id in enumerate(list(self.cat_ids))\n        }\n\n        assert split in [\n            'Area_1', 'Area_2', 'Area_3', 'Area_4', 'Area_5', 'Area_6'\n        ]\n        self.sample_id_list = os.listdir(osp.join(self.data_dir,\n                                                  split))  # conferenceRoom_1\n        for sample_id in self.sample_id_list:\n            if os.path.isfile(osp.join(self.data_dir, split, sample_id)):\n                self.sample_id_list.remove(sample_id)\n\n    def __len__(self):\n        return len(self.sample_id_list)\n\n    def get_infos(self, num_workers=4, has_label=True, sample_id_list=None):\n        \"\"\"Get data infos.\n\n        This method gets information from the raw data.\n\n        Args:\n            num_workers (int, optional): Number of threads to be used.\n                Default: 4.\n            has_label (bool, optional): Whether the data has label.\n                Default: True.\n            sample_id_list (list[int], optional): Index list of the sample.\n                Default: None.\n\n        Returns:\n            infos (list[dict]): Information of the raw data.\n        \"\"\"\n\n        def process_single_scene(sample_idx):\n            print(f'{self.split} sample_idx: {sample_idx}')\n            info = dict()\n            pc_info = {\n                'num_features': 6,\n                'lidar_idx': f'{self.split}_{sample_idx}'\n            }\n            info['point_cloud'] = pc_info\n            pts_filename = osp.join(self.root_dir, 's3dis_data',\n                                    f'{self.split}_{sample_idx}_point.npy')\n            pts_instance_mask_path = osp.join(\n                self.root_dir, 's3dis_data',\n                f'{self.split}_{sample_idx}_ins_label.npy')\n            pts_semantic_mask_path = osp.join(\n                self.root_dir, 's3dis_data',\n                f'{self.split}_{sample_idx}_sem_label.npy')\n\n            points = np.load(pts_filename).astype(np.float32)\n            pts_instance_mask = np.load(pts_instance_mask_path).astype(np.int)\n            pts_semantic_mask = np.load(pts_semantic_mask_path).astype(np.int)\n\n            mmcv.mkdir_or_exist(osp.join(self.root_dir, 'points'))\n            mmcv.mkdir_or_exist(osp.join(self.root_dir, 'instance_mask'))\n            mmcv.mkdir_or_exist(osp.join(self.root_dir, 'semantic_mask'))\n\n            points.tofile(\n                osp.join(self.root_dir, 'points',\n                         f'{self.split}_{sample_idx}.bin'))\n            pts_instance_mask.tofile(\n                osp.join(self.root_dir, 'instance_mask',\n                         f'{self.split}_{sample_idx}.bin'))\n            pts_semantic_mask.tofile(\n                osp.join(self.root_dir, 'semantic_mask',\n                         f'{self.split}_{sample_idx}.bin'))\n\n            info['pts_path'] = osp.join('points',\n                                        f'{self.split}_{sample_idx}.bin')\n            info['pts_instance_mask_path'] = osp.join(\n                'instance_mask', f'{self.split}_{sample_idx}.bin')\n            info['pts_semantic_mask_path'] = osp.join(\n                'semantic_mask', f'{self.split}_{sample_idx}.bin')\n            info['annos'] = self.get_bboxes(points, pts_instance_mask,\n                                            pts_semantic_mask)\n\n            return info\n\n        sample_id_list = sample_id_list if sample_id_list is not None \\\n            else self.sample_id_list\n        with futures.ThreadPoolExecutor(num_workers) as executor:\n            infos = executor.map(process_single_scene, sample_id_list)\n        return list(infos)\n\n    def get_bboxes(self, points, pts_instance_mask, pts_semantic_mask):\n        \"\"\"Convert instance masks to axis-aligned bounding boxes.\n\n        Args:\n            points (np.array): Scene points of shape (n, 6).\n            pts_instance_mask (np.ndarray): Instance labels of shape (n,).\n            pts_semantic_mask (np.ndarray): Semantic labels of shape (n,).\n\n        Returns:\n            dict: A dict containing detection infos with following keys:\n\n                - gt_boxes_upright_depth (np.ndarray): Bounding boxes\n                    of shape (n, 6)\n                - class (np.ndarray): Box labels of shape (n,)\n                - gt_num (int): Number of boxes.\n        \"\"\"\n        bboxes, labels = [], []\n        for i in range(1, pts_instance_mask.max() + 1):\n            ids = pts_instance_mask == i\n            # check if all instance points have same semantic label\n            assert pts_semantic_mask[ids].min() == pts_semantic_mask[ids].max()\n            label = pts_semantic_mask[ids][0]\n            # keep only furniture objects\n            if label in self.cat_ids2class:\n                labels.append(self.cat_ids2class[pts_semantic_mask[ids][0]])\n                pts = points[:, :3][ids]\n                min_pts = pts.min(axis=0)\n                max_pts = pts.max(axis=0)\n                locations = (min_pts + max_pts) / 2\n                dimensions = max_pts - min_pts\n                bboxes.append(np.concatenate((locations, dimensions)))\n        annotation = dict()\n        # follow ScanNet and SUN RGB-D keys\n        annotation['gt_boxes_upright_depth'] = np.array(bboxes)\n        annotation['class'] = np.array(labels)\n        annotation['gt_num'] = len(labels)\n        return annotation\n\n\nclass S3DISSegData(object):\n    \"\"\"S3DIS dataset used to generate infos for semantic segmentation task.\n\n    Args:\n        data_root (str): Root path of the raw data.\n        ann_file (str): The generated scannet infos.\n        split (str, optional): Set split type of the data. Default: 'train'.\n        num_points (int, optional): Number of points in each data input.\n            Default: 8192.\n        label_weight_func (function, optional): Function to compute the\n            label weight. Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 data_root,\n                 ann_file,\n                 split='Area_1',\n                 num_points=4096,\n                 label_weight_func=None):\n        self.data_root = data_root\n        self.data_infos = mmcv.load(ann_file)\n        self.split = split\n        self.num_points = num_points\n\n        self.all_ids = np.arange(13)  # all possible ids\n        self.cat_ids = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,\n                                 12])  # used for seg task\n        self.ignore_index = len(self.cat_ids)\n\n        self.cat_id2class = np.ones((self.all_ids.shape[0],), dtype=np.int) * \\\n            self.ignore_index\n        for i, cat_id in enumerate(self.cat_ids):\n            self.cat_id2class[cat_id] = i\n\n        # label weighting function is taken from\n        # https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24\n        self.label_weight_func = (lambda x: 1.0 / np.log(1.2 + x)) if \\\n            label_weight_func is None else label_weight_func\n\n    def get_seg_infos(self):\n        scene_idxs, label_weight = self.get_scene_idxs_and_label_weight()\n        save_folder = osp.join(self.data_root, 'seg_info')\n        mmcv.mkdir_or_exist(save_folder)\n        np.save(\n            osp.join(save_folder, f'{self.split}_resampled_scene_idxs.npy'),\n            scene_idxs)\n        np.save(\n            osp.join(save_folder, f'{self.split}_label_weight.npy'),\n            label_weight)\n        print(f'{self.split} resampled scene index and label weight saved')\n\n    def _convert_to_label(self, mask):\n        \"\"\"Convert class_id in loaded segmentation mask to label.\"\"\"\n        if isinstance(mask, str):\n            if mask.endswith('npy'):\n                mask = np.load(mask)\n            else:\n                mask = np.fromfile(mask, dtype=np.int64)\n        label = self.cat_id2class[mask]\n        return label\n\n    def get_scene_idxs_and_label_weight(self):\n        \"\"\"Compute scene_idxs for data sampling and label weight for loss\n        calculation.\n\n        We sample more times for scenes with more points. Label_weight is\n        inversely proportional to number of class points.\n        \"\"\"\n        num_classes = len(self.cat_ids)\n        num_point_all = []\n        label_weight = np.zeros((num_classes + 1, ))  # ignore_index\n        for data_info in self.data_infos:\n            label = self._convert_to_label(\n                osp.join(self.data_root, data_info['pts_semantic_mask_path']))\n            num_point_all.append(label.shape[0])\n            class_count, _ = np.histogram(label, range(num_classes + 2))\n            label_weight += class_count\n\n        # repeat scene_idx for num_scene_point // num_sample_point times\n        sample_prob = np.array(num_point_all) / float(np.sum(num_point_all))\n        num_iter = int(np.sum(num_point_all) / float(self.num_points))\n        scene_idxs = []\n        for idx in range(len(self.data_infos)):\n            scene_idxs.extend([idx] * int(round(sample_prob[idx] * num_iter)))\n        scene_idxs = np.array(scene_idxs).astype(np.int32)\n\n        # calculate label weight, adopted from PointNet++\n        label_weight = label_weight[:-1].astype(np.float32)\n        label_weight = label_weight / label_weight.sum()\n        label_weight = self.label_weight_func(label_weight).astype(np.float32)\n\n        return scene_idxs, label_weight\n"
  },
  {
    "path": "tools/data_converter/scannet_data_utils.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os\nfrom concurrent import futures as futures\nfrom os import path as osp\n\nimport mmcv\nimport numpy as np\n\n\nclass ScanNetData(object):\n    \"\"\"ScanNet data.\n\n    Generate scannet infos for scannet_converter.\n\n    Args:\n        root_path (str): Root path of the raw data.\n        split (str, optional): Set split type of the data. Default: 'train'.\n    \"\"\"\n\n    def __init__(self, root_path, split='train'):\n        self.root_dir = root_path\n        self.split = split\n        self.split_dir = osp.join(root_path)\n        self.classes = [\n            'cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',\n            'bookshelf', 'picture', 'counter', 'desk', 'curtain',\n            'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',\n            'garbagebin'\n        ]\n        self.cat2label = {cat: self.classes.index(cat) for cat in self.classes}\n        self.label2cat = {self.cat2label[t]: t for t in self.cat2label}\n        self.cat_ids = np.array(\n            [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39])\n        self.cat_ids2class = {\n            nyu40id: i\n            for i, nyu40id in enumerate(list(self.cat_ids))\n        }\n        assert split in ['train', 'val', 'test']\n        split_file = osp.join(self.root_dir, 'meta_data',\n                              f'scannetv2_{split}.txt')\n        mmcv.check_file_exist(split_file)\n        self.sample_id_list = mmcv.list_from_file(split_file)\n        self.test_mode = (split == 'test')\n\n    def __len__(self):\n        return len(self.sample_id_list)\n\n    def get_aligned_box_label(self, idx):\n        box_file = osp.join(self.root_dir, 'scannet_instance_data',\n                            f'{idx}_aligned_bbox.npy')\n        mmcv.check_file_exist(box_file)\n        return np.load(box_file)\n\n    def get_unaligned_box_label(self, idx):\n        box_file = osp.join(self.root_dir, 'scannet_instance_data',\n                            f'{idx}_unaligned_bbox.npy')\n        mmcv.check_file_exist(box_file)\n        return np.load(box_file)\n\n    def get_axis_align_matrix(self, idx):\n        matrix_file = osp.join(self.root_dir, 'scannet_instance_data',\n                               f'{idx}_axis_align_matrix.npy')\n        mmcv.check_file_exist(matrix_file)\n        return np.load(matrix_file)\n\n    def get_images(self, idx):\n        paths = []\n        path = osp.join(self.root_dir, 'posed_images', idx)\n        for file in sorted(os.listdir(path)):\n            if file.endswith('.jpg'):\n                paths.append(osp.join('posed_images', idx, file))\n        return paths\n\n    def get_extrinsics(self, idx):\n        extrinsics = []\n        path = osp.join(self.root_dir, 'posed_images', idx)\n        for file in sorted(os.listdir(path)):\n            if file.endswith('.txt') and not file == 'intrinsic.txt':\n                extrinsics.append(np.loadtxt(osp.join(path, file)))\n        return extrinsics\n\n    def get_intrinsics(self, idx):\n        matrix_file = osp.join(self.root_dir, 'posed_images', idx,\n                               'intrinsic.txt')\n        mmcv.check_file_exist(matrix_file)\n        return np.loadtxt(matrix_file)\n\n    def get_infos(self, num_workers=4, has_label=True, sample_id_list=None):\n        \"\"\"Get data infos.\n\n        This method gets information from the raw data.\n\n        Args:\n            num_workers (int, optional): Number of threads to be used.\n                Default: 4.\n            has_label (bool, optional): Whether the data has label.\n                Default: True.\n            sample_id_list (list[int], optional): Index list of the sample.\n                Default: None.\n\n        Returns:\n            infos (list[dict]): Information of the raw data.\n        \"\"\"\n\n        def process_single_scene(sample_idx):\n            print(f'{self.split} sample_idx: {sample_idx}')\n            info = dict()\n            pc_info = {'num_features': 6, 'lidar_idx': sample_idx}\n            info['point_cloud'] = pc_info\n            pts_filename = osp.join(self.root_dir, 'scannet_instance_data',\n                                    f'{sample_idx}_vert.npy')\n            points = np.load(pts_filename)\n            mmcv.mkdir_or_exist(osp.join(self.root_dir, 'points'))\n            points.tofile(\n                osp.join(self.root_dir, 'points', f'{sample_idx}.bin'))\n            info['pts_path'] = osp.join('points', f'{sample_idx}.bin')\n\n            # update with RGB image paths if exist\n            if os.path.exists(osp.join(self.root_dir, 'posed_images')):\n                info['intrinsics'] = self.get_intrinsics(sample_idx)\n                all_extrinsics = self.get_extrinsics(sample_idx)\n                all_img_paths = self.get_images(sample_idx)\n                # some poses in ScanNet are invalid\n                extrinsics, img_paths = [], []\n                for extrinsic, img_path in zip(all_extrinsics, all_img_paths):\n                    if np.all(np.isfinite(extrinsic)):\n                        img_paths.append(img_path)\n                        extrinsics.append(extrinsic)\n                info['extrinsics'] = extrinsics\n                info['img_paths'] = img_paths\n\n            if not self.test_mode:\n                pts_instance_mask_path = osp.join(\n                    self.root_dir, 'scannet_instance_data',\n                    f'{sample_idx}_ins_label.npy')\n                pts_semantic_mask_path = osp.join(\n                    self.root_dir, 'scannet_instance_data',\n                    f'{sample_idx}_sem_label.npy')\n\n                pts_instance_mask = np.load(pts_instance_mask_path).astype(\n                    np.int64)\n                pts_semantic_mask = np.load(pts_semantic_mask_path).astype(\n                    np.int64)\n\n                mmcv.mkdir_or_exist(osp.join(self.root_dir, 'instance_mask'))\n                mmcv.mkdir_or_exist(osp.join(self.root_dir, 'semantic_mask'))\n\n                pts_instance_mask.tofile(\n                    osp.join(self.root_dir, 'instance_mask',\n                             f'{sample_idx}.bin'))\n                pts_semantic_mask.tofile(\n                    osp.join(self.root_dir, 'semantic_mask',\n                             f'{sample_idx}.bin'))\n\n                info['pts_instance_mask_path'] = osp.join(\n                    'instance_mask', f'{sample_idx}.bin')\n                info['pts_semantic_mask_path'] = osp.join(\n                    'semantic_mask', f'{sample_idx}.bin')\n\n            if has_label:\n                annotations = {}\n                # box is of shape [k, 6 + class]\n                aligned_box_label = self.get_aligned_box_label(sample_idx)\n                unaligned_box_label = self.get_unaligned_box_label(sample_idx)\n                annotations['gt_num'] = aligned_box_label.shape[0]\n                if annotations['gt_num'] != 0:\n                    aligned_box = aligned_box_label[:, :-1]  # k, 6\n                    unaligned_box = unaligned_box_label[:, :-1]\n                    classes = aligned_box_label[:, -1]  # k\n                    annotations['name'] = np.array([\n                        self.label2cat[self.cat_ids2class[classes[i]]]\n                        for i in range(annotations['gt_num'])\n                    ])\n                    # default names are given to aligned bbox for compatibility\n                    # we also save unaligned bbox info with marked names\n                    annotations['location'] = aligned_box[:, :3]\n                    annotations['dimensions'] = aligned_box[:, 3:6]\n                    annotations['gt_boxes_upright_depth'] = aligned_box\n                    annotations['unaligned_location'] = unaligned_box[:, :3]\n                    annotations['unaligned_dimensions'] = unaligned_box[:, 3:6]\n                    annotations[\n                        'unaligned_gt_boxes_upright_depth'] = unaligned_box\n                    annotations['index'] = np.arange(\n                        annotations['gt_num'], dtype=np.int32)\n                    annotations['class'] = np.array([\n                        self.cat_ids2class[classes[i]]\n                        for i in range(annotations['gt_num'])\n                    ])\n                axis_align_matrix = self.get_axis_align_matrix(sample_idx)\n                annotations['axis_align_matrix'] = axis_align_matrix  # 4x4\n                info['annos'] = annotations\n            return info\n\n        sample_id_list = sample_id_list if sample_id_list is not None \\\n            else self.sample_id_list\n        with futures.ThreadPoolExecutor(num_workers) as executor:\n            infos = executor.map(process_single_scene, sample_id_list)\n        return list(infos)\n\n\nclass ScanNetSegData(object):\n    \"\"\"ScanNet dataset used to generate infos for semantic segmentation task.\n\n    Args:\n        data_root (str): Root path of the raw data.\n        ann_file (str): The generated scannet infos.\n        split (str, optional): Set split type of the data. Default: 'train'.\n        num_points (int, optional): Number of points in each data input.\n            Default: 8192.\n        label_weight_func (function, optional): Function to compute the\n            label weight. Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 data_root,\n                 ann_file,\n                 split='train',\n                 num_points=8192,\n                 label_weight_func=None):\n        self.data_root = data_root\n        self.data_infos = mmcv.load(ann_file)\n        self.split = split\n        assert split in ['train', 'val', 'test']\n        self.num_points = num_points\n\n        self.all_ids = np.arange(41)  # all possible ids\n        self.cat_ids = np.array([\n            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36,\n            39\n        ])  # used for seg task\n        self.ignore_index = len(self.cat_ids)\n\n        self.cat_id2class = np.ones((self.all_ids.shape[0],), dtype=np.int) * \\\n            self.ignore_index\n        for i, cat_id in enumerate(self.cat_ids):\n            self.cat_id2class[cat_id] = i\n\n        # label weighting function is taken from\n        # https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24\n        self.label_weight_func = (lambda x: 1.0 / np.log(1.2 + x)) if \\\n            label_weight_func is None else label_weight_func\n\n    def get_seg_infos(self):\n        if self.split == 'test':\n            return\n        scene_idxs, label_weight = self.get_scene_idxs_and_label_weight()\n        save_folder = osp.join(self.data_root, 'seg_info')\n        mmcv.mkdir_or_exist(save_folder)\n        np.save(\n            osp.join(save_folder, f'{self.split}_resampled_scene_idxs.npy'),\n            scene_idxs)\n        np.save(\n            osp.join(save_folder, f'{self.split}_label_weight.npy'),\n            label_weight)\n        print(f'{self.split} resampled scene index and label weight saved')\n\n    def _convert_to_label(self, mask):\n        \"\"\"Convert class_id in loaded segmentation mask to label.\"\"\"\n        if isinstance(mask, str):\n            if mask.endswith('npy'):\n                mask = np.load(mask)\n            else:\n                mask = np.fromfile(mask, dtype=np.int64)\n        label = self.cat_id2class[mask]\n        return label\n\n    def get_scene_idxs_and_label_weight(self):\n        \"\"\"Compute scene_idxs for data sampling and label weight for loss\n        calculation.\n\n        We sample more times for scenes with more points. Label_weight is\n        inversely proportional to number of class points.\n        \"\"\"\n        num_classes = len(self.cat_ids)\n        num_point_all = []\n        label_weight = np.zeros((num_classes + 1, ))  # ignore_index\n        for data_info in self.data_infos:\n            label = self._convert_to_label(\n                osp.join(self.data_root, data_info['pts_semantic_mask_path']))\n            num_point_all.append(label.shape[0])\n            class_count, _ = np.histogram(label, range(num_classes + 2))\n            label_weight += class_count\n\n        # repeat scene_idx for num_scene_point // num_sample_point times\n        sample_prob = np.array(num_point_all) / float(np.sum(num_point_all))\n        num_iter = int(np.sum(num_point_all) / float(self.num_points))\n        scene_idxs = []\n        for idx in range(len(self.data_infos)):\n            scene_idxs.extend([idx] * int(round(sample_prob[idx] * num_iter)))\n        scene_idxs = np.array(scene_idxs).astype(np.int32)\n\n        # calculate label weight, adopted from PointNet++\n        label_weight = label_weight[:-1].astype(np.float32)\n        label_weight = label_weight / label_weight.sum()\n        label_weight = self.label_weight_func(label_weight).astype(np.float32)\n\n        return scene_idxs, label_weight\n"
  },
  {
    "path": "tools/data_converter/sunrgbd_data_utils.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom concurrent import futures as futures\nfrom os import path as osp\n\nimport mmcv\nimport numpy as np\nfrom scipy import io as sio\n\n\ndef random_sampling(points, num_points, replace=None):\n    \"\"\"Random sampling.\n\n    Sampling point cloud to a certain number of points.\n\n    Args:\n        points (ndarray): Point cloud.\n        num_points (int): The number of samples.\n        replace (bool): Whether the sample is with or without replacement.\n\n    Returns:\n        points (ndarray): Point cloud after sampling.\n    \"\"\"\n    if num_points < 0:\n        return points\n    if replace is None:\n        replace = (points.shape[0] < num_points)\n    choices = np.random.choice(points.shape[0], num_points, replace=replace)\n    return points[choices]\n\n\nclass SUNRGBDInstance(object):\n\n    def __init__(self, line):\n        data = line.split(' ')\n        data[1:] = [float(x) for x in data[1:]]\n        self.classname = data[0]\n        self.xmin = data[1]\n        self.ymin = data[2]\n        self.xmax = data[1] + data[3]\n        self.ymax = data[2] + data[4]\n        self.box2d = np.array([self.xmin, self.ymin, self.xmax, self.ymax])\n        self.centroid = np.array([data[5], data[6], data[7]])\n        self.width = data[8]\n        self.length = data[9]\n        self.height = data[10]\n        # data[9] is x_size (length), data[8] is y_size (width), data[10] is\n        # z_size (height) in our depth coordinate system,\n        # l corresponds to the size along the x axis\n        self.size = np.array([data[9], data[8], data[10]]) * 2\n        self.orientation = np.zeros((3, ))\n        self.orientation[0] = data[11]\n        self.orientation[1] = data[12]\n        self.heading_angle = np.arctan2(self.orientation[1],\n                                        self.orientation[0])\n        self.box3d = np.concatenate(\n            [self.centroid, self.size, self.heading_angle[None]])\n\n\nclass SUNRGBDData(object):\n    \"\"\"SUNRGBD data.\n\n    Generate scannet infos for sunrgbd_converter.\n\n    Args:\n        root_path (str): Root path of the raw data.\n        split (str, optional): Set split type of the data. Default: 'train'.\n        use_v1 (bool, optional): Whether to use v1. Default: False.\n        num_points (int, optional): Number of points to sample. Set to -1\n            to utilize all points. Defaults to -1.\n    \"\"\"\n\n    def __init__(self, root_path, split='train', use_v1=False, num_points=-1):\n        self.root_dir = root_path\n        self.split = split\n        self.split_dir = osp.join(root_path, 'sunrgbd_trainval')\n        self.num_points = num_points\n        self.classes = [\n            'bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',\n            'night_stand', 'bookshelf', 'bathtub'\n        ]\n        self.cat2label = {cat: self.classes.index(cat) for cat in self.classes}\n        self.label2cat = {\n            label: self.classes[label]\n            for label in range(len(self.classes))\n        }\n        assert split in ['train', 'val', 'test']\n        split_file = osp.join(self.split_dir, f'{split}_data_idx.txt')\n        mmcv.check_file_exist(split_file)\n        self.sample_id_list = map(int, mmcv.list_from_file(split_file))\n        self.image_dir = osp.join(self.split_dir, 'image')\n        self.calib_dir = osp.join(self.split_dir, 'calib')\n        self.depth_dir = osp.join(self.split_dir, 'depth')\n        if use_v1:\n            self.label_dir = osp.join(self.split_dir, 'label_v1')\n        else:\n            self.label_dir = osp.join(self.split_dir, 'label')\n\n    def __len__(self):\n        return len(self.sample_id_list)\n\n    def get_image(self, idx):\n        img_filename = osp.join(self.image_dir, f'{idx:06d}.jpg')\n        return mmcv.imread(img_filename)\n\n    def get_image_shape(self, idx):\n        image = self.get_image(idx)\n        return np.array(image.shape[:2], dtype=np.int32)\n\n    def get_depth(self, idx):\n        depth_filename = osp.join(self.depth_dir, f'{idx:06d}.mat')\n        depth = sio.loadmat(depth_filename)['instance']\n        return depth\n\n    def get_calibration(self, idx):\n        calib_filepath = osp.join(self.calib_dir, f'{idx:06d}.txt')\n        lines = [line.rstrip() for line in open(calib_filepath)]\n        Rt = np.array([float(x) for x in lines[0].split(' ')])\n        Rt = np.reshape(Rt, (3, 3), order='F').astype(np.float32)\n        K = np.array([float(x) for x in lines[1].split(' ')])\n        K = np.reshape(K, (3, 3), order='F').astype(np.float32)\n        return K, Rt\n\n    def get_label_objects(self, idx):\n        label_filename = osp.join(self.label_dir, f'{idx:06d}.txt')\n        lines = [line.rstrip() for line in open(label_filename)]\n        objects = [SUNRGBDInstance(line) for line in lines]\n        return objects\n\n    def get_infos(self, num_workers=4, has_label=True, sample_id_list=None):\n        \"\"\"Get data infos.\n\n        This method gets information from the raw data.\n\n        Args:\n            num_workers (int, optional): Number of threads to be used.\n                Default: 4.\n            has_label (bool, optional): Whether the data has label.\n                Default: True.\n            sample_id_list (list[int], optional): Index list of the sample.\n                Default: None.\n\n        Returns:\n            infos (list[dict]): Information of the raw data.\n        \"\"\"\n\n        def process_single_scene(sample_idx):\n            print(f'{self.split} sample_idx: {sample_idx}')\n            # convert depth to points\n            pc_upright_depth = self.get_depth(sample_idx)\n            pc_upright_depth_subsampled = random_sampling(\n                pc_upright_depth, self.num_points)\n\n            info = dict()\n            pc_info = {'num_features': 6, 'lidar_idx': sample_idx}\n            info['point_cloud'] = pc_info\n\n            mmcv.mkdir_or_exist(osp.join(self.root_dir, 'points'))\n            pc_upright_depth_subsampled.tofile(\n                osp.join(self.root_dir, 'points', f'{sample_idx:06d}.bin'))\n\n            info['pts_path'] = osp.join('points', f'{sample_idx:06d}.bin')\n            img_path = osp.join('image', f'{sample_idx:06d}.jpg')\n            image_info = {\n                'image_idx': sample_idx,\n                'image_shape': self.get_image_shape(sample_idx),\n                'image_path': img_path\n            }\n            info['image'] = image_info\n\n            K, Rt = self.get_calibration(sample_idx)\n            calib_info = {'K': K, 'Rt': Rt}\n            info['calib'] = calib_info\n\n            if has_label:\n                obj_list = self.get_label_objects(sample_idx)\n                annotations = {}\n                annotations['gt_num'] = len([\n                    obj.classname for obj in obj_list\n                    if obj.classname in self.cat2label.keys()\n                ])\n                if annotations['gt_num'] != 0:\n                    annotations['name'] = np.array([\n                        obj.classname for obj in obj_list\n                        if obj.classname in self.cat2label.keys()\n                    ])\n                    annotations['bbox'] = np.concatenate([\n                        obj.box2d.reshape(1, 4) for obj in obj_list\n                        if obj.classname in self.cat2label.keys()\n                    ],\n                                                         axis=0)\n                    annotations['location'] = np.concatenate([\n                        obj.centroid.reshape(1, 3) for obj in obj_list\n                        if obj.classname in self.cat2label.keys()\n                    ],\n                                                             axis=0)\n                    annotations['dimensions'] = 2 * np.array([\n                        [obj.length, obj.width, obj.height] for obj in obj_list\n                        if obj.classname in self.cat2label.keys()\n                    ])  # lwh (depth) format\n                    annotations['rotation_y'] = np.array([\n                        obj.heading_angle for obj in obj_list\n                        if obj.classname in self.cat2label.keys()\n                    ])\n                    annotations['index'] = np.arange(\n                        len(obj_list), dtype=np.int32)\n                    annotations['class'] = np.array([\n                        self.cat2label[obj.classname] for obj in obj_list\n                        if obj.classname in self.cat2label.keys()\n                    ])\n                    annotations['gt_boxes_upright_depth'] = np.stack(\n                        [\n                            obj.box3d for obj in obj_list\n                            if obj.classname in self.cat2label.keys()\n                        ],\n                        axis=0)  # (K,8)\n                info['annos'] = annotations\n            return info\n\n        sample_id_list = sample_id_list if \\\n            sample_id_list is not None else self.sample_id_list\n        with futures.ThreadPoolExecutor(num_workers) as executor:\n            infos = executor.map(process_single_scene, sample_id_list)\n        return list(infos)\n"
  },
  {
    "path": "tools/data_converter/waymo_converter.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nr\"\"\"Adapted from `Waymo to KITTI converter\n    <https://github.com/caizhongang/waymo_kitti_converter>`_.\n\"\"\"\n\ntry:\n    from waymo_open_dataset import dataset_pb2\nexcept ImportError:\n    raise ImportError(\n        'Please run \"pip install waymo-open-dataset-tf-2-1-0==1.2.0\" '\n        'to install the official devkit first.')\n\nfrom glob import glob\nfrom os.path import join\n\nimport mmcv\nimport numpy as np\nimport tensorflow as tf\nfrom waymo_open_dataset.utils import range_image_utils, transform_utils\nfrom waymo_open_dataset.utils.frame_utils import \\\n    parse_range_image_and_camera_projection\n\n\nclass Waymo2KITTI(object):\n    \"\"\"Waymo to KITTI converter.\n\n    This class serves as the converter to change the waymo raw data to KITTI\n    format.\n\n    Args:\n        load_dir (str): Directory to load waymo raw data.\n        save_dir (str): Directory to save data in KITTI format.\n        prefix (str): Prefix of filename. In general, 0 for training, 1 for\n            validation and 2 for testing.\n        workers (int, optional): Number of workers for the parallel process.\n        test_mode (bool, optional): Whether in the test_mode. Default: False.\n    \"\"\"\n\n    def __init__(self,\n                 load_dir,\n                 save_dir,\n                 prefix,\n                 workers=64,\n                 test_mode=False):\n        self.filter_empty_3dboxes = True\n        self.filter_no_label_zone_points = True\n\n        self.selected_waymo_classes = ['VEHICLE', 'PEDESTRIAN', 'CYCLIST']\n\n        # Only data collected in specific locations will be converted\n        # If set None, this filter is disabled\n        # Available options: location_sf (main dataset)\n        self.selected_waymo_locations = None\n        self.save_track_id = False\n\n        # turn on eager execution for older tensorflow versions\n        if int(tf.__version__.split('.')[0]) < 2:\n            tf.enable_eager_execution()\n\n        self.lidar_list = [\n            '_FRONT', '_FRONT_RIGHT', '_FRONT_LEFT', '_SIDE_RIGHT',\n            '_SIDE_LEFT'\n        ]\n        self.type_list = [\n            'UNKNOWN', 'VEHICLE', 'PEDESTRIAN', 'SIGN', 'CYCLIST'\n        ]\n        self.waymo_to_kitti_class_map = {\n            'UNKNOWN': 'DontCare',\n            'PEDESTRIAN': 'Pedestrian',\n            'VEHICLE': 'Car',\n            'CYCLIST': 'Cyclist',\n            'SIGN': 'Sign'  # not in kitti\n        }\n\n        self.load_dir = load_dir\n        self.save_dir = save_dir\n        self.prefix = prefix\n        self.workers = int(workers)\n        self.test_mode = test_mode\n\n        self.tfrecord_pathnames = sorted(\n            glob(join(self.load_dir, '*.tfrecord')))\n\n        self.label_save_dir = f'{self.save_dir}/label_'\n        self.label_all_save_dir = f'{self.save_dir}/label_all'\n        self.image_save_dir = f'{self.save_dir}/image_'\n        self.calib_save_dir = f'{self.save_dir}/calib'\n        self.point_cloud_save_dir = f'{self.save_dir}/velodyne'\n        self.pose_save_dir = f'{self.save_dir}/pose'\n        self.timestamp_save_dir = f'{self.save_dir}/timestamp'\n\n        self.create_folder()\n\n    def convert(self):\n        \"\"\"Convert action.\"\"\"\n        print('Start converting ...')\n        mmcv.track_parallel_progress(self.convert_one, range(len(self)),\n                                     self.workers)\n        print('\\nFinished ...')\n\n    def convert_one(self, file_idx):\n        \"\"\"Convert action for single file.\n\n        Args:\n            file_idx (int): Index of the file to be converted.\n        \"\"\"\n        pathname = self.tfrecord_pathnames[file_idx]\n        dataset = tf.data.TFRecordDataset(pathname, compression_type='')\n\n        for frame_idx, data in enumerate(dataset):\n\n            frame = dataset_pb2.Frame()\n            frame.ParseFromString(bytearray(data.numpy()))\n            if (self.selected_waymo_locations is not None\n                    and frame.context.stats.location\n                    not in self.selected_waymo_locations):\n                continue\n\n            self.save_image(frame, file_idx, frame_idx)\n            self.save_calib(frame, file_idx, frame_idx)\n            self.save_lidar(frame, file_idx, frame_idx)\n            self.save_pose(frame, file_idx, frame_idx)\n            self.save_timestamp(frame, file_idx, frame_idx)\n\n            if not self.test_mode:\n                self.save_label(frame, file_idx, frame_idx)\n\n    def __len__(self):\n        \"\"\"Length of the filename list.\"\"\"\n        return len(self.tfrecord_pathnames)\n\n    def save_image(self, frame, file_idx, frame_idx):\n        \"\"\"Parse and save the images in png format.\n\n        Args:\n            frame (:obj:`Frame`): Open dataset frame proto.\n            file_idx (int): Current file index.\n            frame_idx (int): Current frame index.\n        \"\"\"\n        for img in frame.images:\n            img_path = f'{self.image_save_dir}{str(img.name - 1)}/' + \\\n                f'{self.prefix}{str(file_idx).zfill(3)}' + \\\n                f'{str(frame_idx).zfill(3)}.png'\n            img = mmcv.imfrombytes(img.image)\n            mmcv.imwrite(img, img_path)\n\n    def save_calib(self, frame, file_idx, frame_idx):\n        \"\"\"Parse and save the calibration data.\n\n        Args:\n            frame (:obj:`Frame`): Open dataset frame proto.\n            file_idx (int): Current file index.\n            frame_idx (int): Current frame index.\n        \"\"\"\n        # waymo front camera to kitti reference camera\n        T_front_cam_to_ref = np.array([[0.0, -1.0, 0.0], [0.0, 0.0, -1.0],\n                                       [1.0, 0.0, 0.0]])\n        camera_calibs = []\n        R0_rect = [f'{i:e}' for i in np.eye(3).flatten()]\n        Tr_velo_to_cams = []\n        calib_context = ''\n\n        for camera in frame.context.camera_calibrations:\n            # extrinsic parameters\n            T_cam_to_vehicle = np.array(camera.extrinsic.transform).reshape(\n                4, 4)\n            T_vehicle_to_cam = np.linalg.inv(T_cam_to_vehicle)\n            Tr_velo_to_cam = \\\n                self.cart_to_homo(T_front_cam_to_ref) @ T_vehicle_to_cam\n            if camera.name == 1:  # FRONT = 1, see dataset.proto for details\n                self.T_velo_to_front_cam = Tr_velo_to_cam.copy()\n            Tr_velo_to_cam = Tr_velo_to_cam[:3, :].reshape((12, ))\n            Tr_velo_to_cams.append([f'{i:e}' for i in Tr_velo_to_cam])\n\n            # intrinsic parameters\n            camera_calib = np.zeros((3, 4))\n            camera_calib[0, 0] = camera.intrinsic[0]\n            camera_calib[1, 1] = camera.intrinsic[1]\n            camera_calib[0, 2] = camera.intrinsic[2]\n            camera_calib[1, 2] = camera.intrinsic[3]\n            camera_calib[2, 2] = 1\n            camera_calib = list(camera_calib.reshape(12))\n            camera_calib = [f'{i:e}' for i in camera_calib]\n            camera_calibs.append(camera_calib)\n\n        # all camera ids are saved as id-1 in the result because\n        # camera 0 is unknown in the proto\n        for i in range(5):\n            calib_context += 'P' + str(i) + ': ' + \\\n                ' '.join(camera_calibs[i]) + '\\n'\n        calib_context += 'R0_rect' + ': ' + ' '.join(R0_rect) + '\\n'\n        for i in range(5):\n            calib_context += 'Tr_velo_to_cam_' + str(i) + ': ' + \\\n                ' '.join(Tr_velo_to_cams[i]) + '\\n'\n\n        with open(\n                f'{self.calib_save_dir}/{self.prefix}' +\n                f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt',\n                'w+') as fp_calib:\n            fp_calib.write(calib_context)\n            fp_calib.close()\n\n    def save_lidar(self, frame, file_idx, frame_idx):\n        \"\"\"Parse and save the lidar data in psd format.\n\n        Args:\n            frame (:obj:`Frame`): Open dataset frame proto.\n            file_idx (int): Current file index.\n            frame_idx (int): Current frame index.\n        \"\"\"\n        range_images, camera_projections, range_image_top_pose = \\\n            parse_range_image_and_camera_projection(frame)\n\n        # First return\n        points_0, cp_points_0, intensity_0, elongation_0, mask_indices_0 = \\\n            self.convert_range_image_to_point_cloud(\n                frame,\n                range_images,\n                camera_projections,\n                range_image_top_pose,\n                ri_index=0\n            )\n        points_0 = np.concatenate(points_0, axis=0)\n        intensity_0 = np.concatenate(intensity_0, axis=0)\n        elongation_0 = np.concatenate(elongation_0, axis=0)\n        mask_indices_0 = np.concatenate(mask_indices_0, axis=0)\n\n        # Second return\n        points_1, cp_points_1, intensity_1, elongation_1, mask_indices_1 = \\\n            self.convert_range_image_to_point_cloud(\n                frame,\n                range_images,\n                camera_projections,\n                range_image_top_pose,\n                ri_index=1\n            )\n        points_1 = np.concatenate(points_1, axis=0)\n        intensity_1 = np.concatenate(intensity_1, axis=0)\n        elongation_1 = np.concatenate(elongation_1, axis=0)\n        mask_indices_1 = np.concatenate(mask_indices_1, axis=0)\n\n        points = np.concatenate([points_0, points_1], axis=0)\n        intensity = np.concatenate([intensity_0, intensity_1], axis=0)\n        elongation = np.concatenate([elongation_0, elongation_1], axis=0)\n        mask_indices = np.concatenate([mask_indices_0, mask_indices_1], axis=0)\n\n        # timestamp = frame.timestamp_micros * np.ones_like(intensity)\n\n        # concatenate x,y,z, intensity, elongation, timestamp (6-dim)\n        point_cloud = np.column_stack(\n            (points, intensity, elongation, mask_indices))\n\n        pc_path = f'{self.point_cloud_save_dir}/{self.prefix}' + \\\n            f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.bin'\n        point_cloud.astype(np.float32).tofile(pc_path)\n\n    def save_label(self, frame, file_idx, frame_idx):\n        \"\"\"Parse and save the label data in txt format.\n        The relation between waymo and kitti coordinates is noteworthy:\n        1. x, y, z correspond to l, w, h (waymo) -> l, h, w (kitti)\n        2. x-y-z: front-left-up (waymo) -> right-down-front(kitti)\n        3. bbox origin at volumetric center (waymo) -> bottom center (kitti)\n        4. rotation: +x around y-axis (kitti) -> +x around z-axis (waymo)\n\n        Args:\n            frame (:obj:`Frame`): Open dataset frame proto.\n            file_idx (int): Current file index.\n            frame_idx (int): Current frame index.\n        \"\"\"\n        fp_label_all = open(\n            f'{self.label_all_save_dir}/{self.prefix}' +\n            f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt', 'w+')\n        id_to_bbox = dict()\n        id_to_name = dict()\n        for labels in frame.projected_lidar_labels:\n            name = labels.name\n            for label in labels.labels:\n                # TODO: need a workaround as bbox may not belong to front cam\n                bbox = [\n                    label.box.center_x - label.box.length / 2,\n                    label.box.center_y - label.box.width / 2,\n                    label.box.center_x + label.box.length / 2,\n                    label.box.center_y + label.box.width / 2\n                ]\n                id_to_bbox[label.id] = bbox\n                id_to_name[label.id] = name - 1\n\n        for obj in frame.laser_labels:\n            bounding_box = None\n            name = None\n            id = obj.id\n            for lidar in self.lidar_list:\n                if id + lidar in id_to_bbox:\n                    bounding_box = id_to_bbox.get(id + lidar)\n                    name = str(id_to_name.get(id + lidar))\n                    break\n\n            if bounding_box is None or name is None:\n                name = '0'\n                bounding_box = (0, 0, 0, 0)\n\n            my_type = self.type_list[obj.type]\n\n            if my_type not in self.selected_waymo_classes:\n                continue\n\n            if self.filter_empty_3dboxes and obj.num_lidar_points_in_box < 1:\n                continue\n\n            my_type = self.waymo_to_kitti_class_map[my_type]\n\n            height = obj.box.height\n            width = obj.box.width\n            length = obj.box.length\n\n            x = obj.box.center_x\n            y = obj.box.center_y\n            z = obj.box.center_z - height / 2\n\n            # project bounding box to the virtual reference frame\n            pt_ref = self.T_velo_to_front_cam @ \\\n                np.array([x, y, z, 1]).reshape((4, 1))\n            x, y, z, _ = pt_ref.flatten().tolist()\n\n            rotation_y = -obj.box.heading - np.pi / 2\n            track_id = obj.id\n\n            # not available\n            truncated = 0\n            occluded = 0\n            alpha = -10\n\n            line = my_type + \\\n                ' {} {} {} {} {} {} {} {} {} {} {} {} {} {}\\n'.format(\n                    round(truncated, 2), occluded, round(alpha, 2),\n                    round(bounding_box[0], 2), round(bounding_box[1], 2),\n                    round(bounding_box[2], 2), round(bounding_box[3], 2),\n                    round(height, 2), round(width, 2), round(length, 2),\n                    round(x, 2), round(y, 2), round(z, 2),\n                    round(rotation_y, 2))\n\n            if self.save_track_id:\n                line_all = line[:-1] + ' ' + name + ' ' + track_id + '\\n'\n            else:\n                line_all = line[:-1] + ' ' + name + '\\n'\n\n            fp_label = open(\n                f'{self.label_save_dir}{name}/{self.prefix}' +\n                f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt', 'a')\n            fp_label.write(line)\n            fp_label.close()\n\n            fp_label_all.write(line_all)\n\n        fp_label_all.close()\n\n    def save_pose(self, frame, file_idx, frame_idx):\n        \"\"\"Parse and save the pose data.\n\n        Note that SDC's own pose is not included in the regular training\n        of KITTI dataset. KITTI raw dataset contains ego motion files\n        but are not often used. Pose is important for algorithms that\n        take advantage of the temporal information.\n\n        Args:\n            frame (:obj:`Frame`): Open dataset frame proto.\n            file_idx (int): Current file index.\n            frame_idx (int): Current frame index.\n        \"\"\"\n        pose = np.array(frame.pose.transform).reshape(4, 4)\n        np.savetxt(\n            join(f'{self.pose_save_dir}/{self.prefix}' +\n                 f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt'),\n            pose)\n\n    def save_timestamp(self, frame, file_idx, frame_idx):\n        \"\"\"Save the timestamp data in a separate file instead of the\n        pointcloud.\n\n        Note that SDC's own pose is not included in the regular training\n        of KITTI dataset. KITTI raw dataset contains ego motion files\n        but are not often used. Pose is important for algorithms that\n        take advantage of the temporal information.\n\n        Args:\n            frame (:obj:`Frame`): Open dataset frame proto.\n            file_idx (int): Current file index.\n            frame_idx (int): Current frame index.\n        \"\"\"\n        with open(\n                join(f'{self.timestamp_save_dir}/{self.prefix}' +\n                     f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt'),\n                'w') as f:\n            f.write(str(frame.timestamp_micros))\n\n    def create_folder(self):\n        \"\"\"Create folder for data preprocessing.\"\"\"\n        if not self.test_mode:\n            dir_list1 = [\n                self.label_all_save_dir, self.calib_save_dir,\n                self.point_cloud_save_dir, self.pose_save_dir,\n                self.timestamp_save_dir\n            ]\n            dir_list2 = [self.label_save_dir, self.image_save_dir]\n        else:\n            dir_list1 = [\n                self.calib_save_dir, self.point_cloud_save_dir,\n                self.pose_save_dir, self.timestamp_save_dir\n            ]\n            dir_list2 = [self.image_save_dir]\n        for d in dir_list1:\n            mmcv.mkdir_or_exist(d)\n        for d in dir_list2:\n            for i in range(5):\n                mmcv.mkdir_or_exist(f'{d}{str(i)}')\n\n    def convert_range_image_to_point_cloud(self,\n                                           frame,\n                                           range_images,\n                                           camera_projections,\n                                           range_image_top_pose,\n                                           ri_index=0):\n        \"\"\"Convert range images to point cloud.\n\n        Args:\n            frame (:obj:`Frame`): Open dataset frame.\n            range_images (dict): Mapping from laser_name to list of two\n                range images corresponding with two returns.\n            camera_projections (dict): Mapping from laser_name to list of two\n                camera projections corresponding with two returns.\n            range_image_top_pose (:obj:`Transform`): Range image pixel pose for\n                top lidar.\n            ri_index (int, optional): 0 for the first return,\n                1 for the second return. Default: 0.\n\n        Returns:\n            tuple[list[np.ndarray]]: (List of points with shape [N, 3],\n                camera projections of points with shape [N, 6], intensity\n                with shape [N, 1], elongation with shape [N, 1], points'\n                position in the depth map (element offset if points come from\n                the main lidar otherwise -1) with shape[N, 1]). All the\n                lists have the length of lidar numbers (5).\n        \"\"\"\n        calibrations = sorted(\n            frame.context.laser_calibrations, key=lambda c: c.name)\n        points = []\n        cp_points = []\n        intensity = []\n        elongation = []\n        mask_indices = []\n\n        frame_pose = tf.convert_to_tensor(\n            value=np.reshape(np.array(frame.pose.transform), [4, 4]))\n        # [H, W, 6]\n        range_image_top_pose_tensor = tf.reshape(\n            tf.convert_to_tensor(value=range_image_top_pose.data),\n            range_image_top_pose.shape.dims)\n        # [H, W, 3, 3]\n        range_image_top_pose_tensor_rotation = \\\n            transform_utils.get_rotation_matrix(\n                range_image_top_pose_tensor[..., 0],\n                range_image_top_pose_tensor[..., 1],\n                range_image_top_pose_tensor[..., 2])\n        range_image_top_pose_tensor_translation = \\\n            range_image_top_pose_tensor[..., 3:]\n        range_image_top_pose_tensor = transform_utils.get_transform(\n            range_image_top_pose_tensor_rotation,\n            range_image_top_pose_tensor_translation)\n        for c in calibrations:\n            range_image = range_images[c.name][ri_index]\n            if len(c.beam_inclinations) == 0:\n                beam_inclinations = range_image_utils.compute_inclination(\n                    tf.constant(\n                        [c.beam_inclination_min, c.beam_inclination_max]),\n                    height=range_image.shape.dims[0])\n            else:\n                beam_inclinations = tf.constant(c.beam_inclinations)\n\n            beam_inclinations = tf.reverse(beam_inclinations, axis=[-1])\n            extrinsic = np.reshape(np.array(c.extrinsic.transform), [4, 4])\n\n            range_image_tensor = tf.reshape(\n                tf.convert_to_tensor(value=range_image.data),\n                range_image.shape.dims)\n            pixel_pose_local = None\n            frame_pose_local = None\n            if c.name == dataset_pb2.LaserName.TOP:\n                pixel_pose_local = range_image_top_pose_tensor\n                pixel_pose_local = tf.expand_dims(pixel_pose_local, axis=0)\n                frame_pose_local = tf.expand_dims(frame_pose, axis=0)\n            range_image_mask = range_image_tensor[..., 0] > 0\n\n            if self.filter_no_label_zone_points:\n                nlz_mask = range_image_tensor[..., 3] != 1.0  # 1.0: in NLZ\n                range_image_mask = range_image_mask & nlz_mask\n\n            range_image_cartesian = \\\n                range_image_utils.extract_point_cloud_from_range_image(\n                    tf.expand_dims(range_image_tensor[..., 0], axis=0),\n                    tf.expand_dims(extrinsic, axis=0),\n                    tf.expand_dims(tf.convert_to_tensor(\n                        value=beam_inclinations), axis=0),\n                    pixel_pose=pixel_pose_local,\n                    frame_pose=frame_pose_local)\n\n            mask_index = tf.where(range_image_mask)\n\n            range_image_cartesian = tf.squeeze(range_image_cartesian, axis=0)\n            points_tensor = tf.gather_nd(range_image_cartesian, mask_index)\n\n            cp = camera_projections[c.name][ri_index]\n            cp_tensor = tf.reshape(\n                tf.convert_to_tensor(value=cp.data), cp.shape.dims)\n            cp_points_tensor = tf.gather_nd(cp_tensor, mask_index)\n            points.append(points_tensor.numpy())\n            cp_points.append(cp_points_tensor.numpy())\n\n            intensity_tensor = tf.gather_nd(range_image_tensor[..., 1],\n                                            mask_index)\n            intensity.append(intensity_tensor.numpy())\n\n            elongation_tensor = tf.gather_nd(range_image_tensor[..., 2],\n                                             mask_index)\n            elongation.append(elongation_tensor.numpy())\n            if c.name == 1:\n                mask_index = (ri_index * range_image_mask.shape[0] +\n                              mask_index[:, 0]\n                              ) * range_image_mask.shape[1] + mask_index[:, 1]\n                mask_index = mask_index.numpy().astype(elongation[-1].dtype)\n            else:\n                mask_index = np.full_like(elongation[-1], -1)\n\n            mask_indices.append(mask_index)\n\n        return points, cp_points, intensity, elongation, mask_indices\n\n    def cart_to_homo(self, mat):\n        \"\"\"Convert transformation matrix in Cartesian coordinates to\n        homogeneous format.\n\n        Args:\n            mat (np.ndarray): Transformation matrix in Cartesian.\n                The input matrix shape is 3x3 or 3x4.\n\n        Returns:\n            np.ndarray: Transformation matrix in homogeneous format.\n                The matrix shape is 4x4.\n        \"\"\"\n        ret = np.eye(4)\n        if mat.shape == (3, 3):\n            ret[:3, :3] = mat\n        elif mat.shape == (3, 4):\n            ret[:3, :] = mat\n        else:\n            raise ValueError(mat.shape)\n        return ret\n"
  },
  {
    "path": "tools/deployment/mmdet3d2torchserve.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom argparse import ArgumentParser, Namespace\nfrom pathlib import Path\nfrom tempfile import TemporaryDirectory\n\nimport mmcv\n\ntry:\n    from model_archiver.model_packaging import package_model\n    from model_archiver.model_packaging_utils import ModelExportUtils\nexcept ImportError:\n    package_model = None\n\n\ndef mmdet3d2torchserve(\n    config_file: str,\n    checkpoint_file: str,\n    output_folder: str,\n    model_name: str,\n    model_version: str = '1.0',\n    force: bool = False,\n):\n    \"\"\"Converts MMDetection3D model (config + checkpoint) to TorchServe `.mar`.\n\n    Args:\n        config_file (str):\n            In MMDetection3D config format.\n            The contents vary for each task repository.\n        checkpoint_file (str):\n            In MMDetection3D checkpoint format.\n            The contents vary for each task repository.\n        output_folder (str):\n            Folder where `{model_name}.mar` will be created.\n            The file created will be in TorchServe archive format.\n        model_name (str):\n            If not None, used for naming the `{model_name}.mar` file\n            that will be created under `output_folder`.\n            If None, `{Path(checkpoint_file).stem}` will be used.\n        model_version (str, optional):\n            Model's version. Default: '1.0'.\n        force (bool, optional):\n            If True, if there is an existing `{model_name}.mar`\n            file under `output_folder` it will be overwritten.\n            Default: False.\n    \"\"\"\n    mmcv.mkdir_or_exist(output_folder)\n\n    config = mmcv.Config.fromfile(config_file)\n\n    with TemporaryDirectory() as tmpdir:\n        config.dump(f'{tmpdir}/config.py')\n\n        args = Namespace(\n            **{\n                'model_file': f'{tmpdir}/config.py',\n                'serialized_file': checkpoint_file,\n                'handler': f'{Path(__file__).parent}/mmdet3d_handler.py',\n                'model_name': model_name or Path(checkpoint_file).stem,\n                'version': model_version,\n                'export_path': output_folder,\n                'force': force,\n                'requirements_file': None,\n                'extra_files': None,\n                'runtime': 'python',\n                'archive_format': 'default'\n            })\n        manifest = ModelExportUtils.generate_manifest_json(args)\n        package_model(args, manifest)\n\n\ndef parse_args():\n    parser = ArgumentParser(\n        description='Convert MMDetection models to TorchServe `.mar` format.')\n    parser.add_argument('config', type=str, help='config file path')\n    parser.add_argument('checkpoint', type=str, help='checkpoint file path')\n    parser.add_argument(\n        '--output-folder',\n        type=str,\n        required=True,\n        help='Folder where `{model_name}.mar` will be created.')\n    parser.add_argument(\n        '--model-name',\n        type=str,\n        default=None,\n        help='If not None, used for naming the `{model_name}.mar`'\n        'file that will be created under `output_folder`.'\n        'If None, `{Path(checkpoint_file).stem}` will be used.')\n    parser.add_argument(\n        '--model-version',\n        type=str,\n        default='1.0',\n        help='Number used for versioning.')\n    parser.add_argument(\n        '-f',\n        '--force',\n        action='store_true',\n        help='overwrite the existing `{model_name}.mar`')\n    args = parser.parse_args()\n\n    return args\n\n\nif __name__ == '__main__':\n    args = parse_args()\n\n    if package_model is None:\n        raise ImportError('`torch-model-archiver` is required.'\n                          'Try: pip install torch-model-archiver')\n\n    mmdet3d2torchserve(args.config, args.checkpoint, args.output_folder,\n                       args.model_name, args.model_version, args.force)\n"
  },
  {
    "path": "tools/deployment/mmdet3d_handler.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport base64\nimport os\n\nimport numpy as np\nimport torch\nfrom ts.torch_handler.base_handler import BaseHandler\n\nfrom mmdet3d.apis import inference_detector, init_model\nfrom mmdet3d.core.points import get_points_type\n\n\nclass MMdet3dHandler(BaseHandler):\n    \"\"\"MMDetection3D Handler used in TorchServe.\n\n    Handler to load models in MMDetection3D, and it will process data to get\n    predicted results. For now, it only supports SECOND.\n    \"\"\"\n    threshold = 0.5\n    load_dim = 4\n    use_dim = [0, 1, 2, 3]\n    coord_type = 'LIDAR'\n    attribute_dims = None\n\n    def initialize(self, context):\n        \"\"\"Initialize function loads the model in MMDetection3D.\n\n        Args:\n            context (context): It is a JSON Object containing information\n                pertaining to the model artifacts parameters.\n        \"\"\"\n        properties = context.system_properties\n        self.map_location = 'cuda' if torch.cuda.is_available() else 'cpu'\n        self.device = torch.device(self.map_location + ':' +\n                                   str(properties.get('gpu_id')) if torch.cuda.\n                                   is_available() else self.map_location)\n        self.manifest = context.manifest\n\n        model_dir = properties.get('model_dir')\n        serialized_file = self.manifest['model']['serializedFile']\n        checkpoint = os.path.join(model_dir, serialized_file)\n        self.config_file = os.path.join(model_dir, 'config.py')\n        self.model = init_model(self.config_file, checkpoint, self.device)\n        self.initialized = True\n\n    def preprocess(self, data):\n        \"\"\"Preprocess function converts data into LiDARPoints class.\n\n        Args:\n            data (List): Input data from the request.\n\n        Returns:\n            `LiDARPoints` : The preprocess function returns the input\n                point cloud data as LiDARPoints class.\n        \"\"\"\n        for row in data:\n            # Compat layer: normally the envelope should just return the data\n            # directly, but older versions of Torchserve didn't have envelope.\n            pts = row.get('data') or row.get('body')\n            if isinstance(pts, str):\n                pts = base64.b64decode(pts)\n\n            points = np.frombuffer(pts, dtype=np.float32)\n            points = points.reshape(-1, self.load_dim)\n            points = points[:, self.use_dim]\n            points_class = get_points_type(self.coord_type)\n            points = points_class(\n                points,\n                points_dim=points.shape[-1],\n                attribute_dims=self.attribute_dims)\n\n        return points\n\n    def inference(self, data):\n        \"\"\"Inference Function.\n\n        This function is used to make a prediction call on the\n        given input request.\n\n        Args:\n            data (`LiDARPoints`): LiDARPoints class passed to make\n                the inference request.\n\n        Returns:\n            List(dict) : The predicted result is returned in this function.\n        \"\"\"\n        results, _ = inference_detector(self.model, data)\n        return results\n\n    def postprocess(self, data):\n        \"\"\"Postprocess function.\n\n        This function makes use of the output from the inference and\n        converts it into a torchserve supported response output.\n\n        Args:\n            data (List[dict]): The data received from the prediction\n                output of the model.\n\n        Returns:\n            List: The post process function returns a list of the predicted\n                output.\n        \"\"\"\n        output = []\n        for pts_index, result in enumerate(data):\n            output.append([])\n            if 'pts_bbox' in result.keys():\n                pred_bboxes = result['pts_bbox']['boxes_3d'].tensor.numpy()\n                pred_scores = result['pts_bbox']['scores_3d'].numpy()\n            else:\n                pred_bboxes = result['boxes_3d'].tensor.numpy()\n                pred_scores = result['scores_3d'].numpy()\n\n            index = pred_scores > self.threshold\n            bbox_coords = pred_bboxes[index].tolist()\n            score = pred_scores[index].tolist()\n\n            output[pts_index].append({'3dbbox': bbox_coords, 'score': score})\n\n        return output\n"
  },
  {
    "path": "tools/deployment/test_torchserver.py",
    "content": "from argparse import ArgumentParser\n\nimport numpy as np\nimport requests\n\nfrom mmdet3d.apis import inference_detector, init_model\n\n\ndef parse_args():\n    parser = ArgumentParser()\n    parser.add_argument('pcd', help='Point cloud file')\n    parser.add_argument('config', help='Config file')\n    parser.add_argument('checkpoint', help='Checkpoint file')\n    parser.add_argument('model_name', help='The model name in the server')\n    parser.add_argument(\n        '--inference-addr',\n        default='127.0.0.1:8080',\n        help='Address and port of the inference server')\n    parser.add_argument(\n        '--device', default='cuda:0', help='Device used for inference')\n    parser.add_argument(\n        '--score-thr', type=float, default=0.5, help='3d bbox score threshold')\n    args = parser.parse_args()\n    return args\n\n\ndef parse_result(input):\n    bbox = input[0]['3dbbox']\n    result = np.array(bbox)\n    return result\n\n\ndef main(args):\n    # build the model from a config file and a checkpoint file\n    model = init_model(args.config, args.checkpoint, device=args.device)\n    # test a single point cloud file\n    model_result, _ = inference_detector(model, args.pcd)\n    # filter the 3d bboxes whose scores > 0.5\n    if 'pts_bbox' in model_result[0].keys():\n        pred_bboxes = model_result[0]['pts_bbox']['boxes_3d'].tensor.numpy()\n        pred_scores = model_result[0]['pts_bbox']['scores_3d'].numpy()\n    else:\n        pred_bboxes = model_result[0]['boxes_3d'].tensor.numpy()\n        pred_scores = model_result[0]['scores_3d'].numpy()\n    model_result = pred_bboxes[pred_scores > 0.5]\n\n    url = 'http://' + args.inference_addr + '/predictions/' + args.model_name\n    with open(args.pcd, 'rb') as points:\n        response = requests.post(url, points)\n    server_result = parse_result(response.json())\n    assert np.allclose(model_result, server_result)\n\n\nif __name__ == '__main__':\n    args = parse_args()\n    main(args)\n"
  },
  {
    "path": "tools/dist_test.sh",
    "content": "#!/usr/bin/env bash\n\nCONFIG=$1\nCHECKPOINT=$2\nGPUS=$3\nNNODES=${NNODES:-1}\nNODE_RANK=${NODE_RANK:-0}\nPORT=${PORT:-29500}\nMASTER_ADDR=${MASTER_ADDR:-\"127.0.0.1\"}\n\nPYTHONPATH=\"$(dirname $0)/..\":$PYTHONPATH \\\npython -m torch.distributed.launch \\\n    --nnodes=$NNODES \\\n    --node_rank=$NODE_RANK \\\n    --master_addr=$MASTER_ADDR \\\n    --nproc_per_node=$GPUS \\\n    --master_port=$PORT \\\n    $(dirname \"$0\")/test.py \\\n    $CONFIG \\\n    $CHECKPOINT \\\n    --launcher pytorch \\\n    ${@:4}\n"
  },
  {
    "path": "tools/dist_train.sh",
    "content": "#!/usr/bin/env bash\nCONFIG=$1\nGPUS=$2\nNNODES=${NNODES:-1}\nNODE_RANK=${NODE_RANK:-0}\nPORT=${PORT:-29500}\nMASTER_ADDR=${MASTER_ADDR:-\"127.0.0.1\"}\n\nPYTHONPATH=\"$(dirname $0)/..\":$PYTHONPATH \\\npython -m torch.distributed.launch \\\n    --nnodes=$NNODES \\\n    --node_rank=$NODE_RANK \\\n    --master_addr=$MASTER_ADDR \\\n    --nproc_per_node=$GPUS \\\n    --master_port=$PORT \\\n    $(dirname \"$0\")/train.py \\\n    $CONFIG \\\n    --seed 0 \\\n    --launcher pytorch ${@:3}\n"
  },
  {
    "path": "tools/eval.py",
    "content": "import numpy as np\nimport mmcv\n# data = mmcv.load('/mount/data/jiahan/fbbev/test/Sun_Oct_15_11_35/results_nusc_planning.json')\ndata = mmcv.load('/mount/data/FBBEV/test/planner_r50_8x4_12ep_102x102_4f_S111_fix2_/Tue_Oct_24_03_58/results_nusc_planning.json')\n#sort\nkeys = list(data.keys())\n# print(keys)\nnew_keys = []\nfor key in keys:\n   s =key.split(\"-\")\n   new_keys.append([int(s[1]),int(s[2])])\n\nnew_keys=sorted(new_keys,key=(lambda x:(x[0], x[1])))\nsorted_keys = []\nfor key in new_keys:\n   v = ['scene',  str(key[0]).zfill(4), str(key[1]) ]\n   k='-'.join(v)\n   sorted_keys.append(k)\n\nprint(len(data))\n\nall_scene_keys=[]\nkey='-'.join(sorted_keys[0].split(\"-\")[:2])\n# print(key)\nscene=[]\n\nfor k in sorted_keys:\n    if(key in k):\n        # print(True)\n        scene.append(k)\n    else:\n        s =k.split(\"-\")\n        key='-'.join(s[:2])\n        if len(scene)<39:\n            print(scene)\n        all_scene_keys.append(scene)\n        scene=[k]\n\n# print(all_scene_keys)\nlen(all_scene_keys)\n#tranform raw data\nnew_data={}\nfor keys in all_scene_keys:\n    l = len(keys)\n    for i in range(l):\n        val = []\n        index = i\n        for j in range(i+1):\n            if index>6:\n                index-=1\n            else:\n                val.append(data[keys[j]][index])\n                index-=1\n        new_data[keys[i]]=val\n\n#compute mean and var\nstable_dist_with_gt=[]\nstable_mean_distance=[]\nstable_variance_distance=[]\n\nfor key, value in new_data.items():\n    #filter unstable data\n    if(len(value)!=7):\n        continue\n    assert len(value)==7\n    #compute mean\n    gt = value[-1]\n    pred = value[:-1]\n    coor_mean= np.mean(pred, axis=0)\n    #L2\n    dist = np.linalg.norm(coor_mean - gt)\n    stable_dist_with_gt.append(dist)\n\n    #compute var\n    data_array = np.array(pred)\n    \n    distances = np.linalg.norm(data_array - coor_mean, axis=1)\n    mean_distance = np.mean(distances)\n    variance_distance = np.var(distances)\n\n    stable_mean_distance.append(mean_distance)\n    stable_variance_distance.append(variance_distance)\n\nprint('stable_dist_with_gt: {}'.format(np.mean(stable_dist_with_gt)))\nprint('stable_mean_distance: {}'.format(np.mean(stable_mean_distance)))\nprint('stable_variance_distance: {}'.format(np.mean(stable_variance_distance)))\n\nimport random\nimport math\nimport matplotlib.pyplot as plt\n\n# 生成40种不同颜色的列表\ncolors = ['#%02X%02X%02X' % (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255) ) for _ in range(40)]\n\n# colors = ['g', 'b', 'c', 'm', 'y', 'k', 'purple', 'orange', 'pink', 'brown', 'lime', 'teal', 'gold', 'indigo', 'slategray', 'violet', 'darkred', 'maroon', 'orchid']  # 20种不同颜色选项\nmarkers = ['x', 's', 'D', '^', 'v', 'p']  # 不同的标记选项\n\nfor keys in all_scene_keys:\n    all_coor=[]\n    l = len(keys)\n\n    for i in range(l):\n        coordinates=data[keys[i]]\n        all_coor.extend(coordinates)\n    min_x=min(coor[0] for coor in all_coor)\n    min_y=min(coor[1] for coor in all_coor)\n    max_x=max(coor[0] for coor in all_coor)\n    max_y=max(coor[1] for coor in all_coor)  \n    ratio=math.ceil((max_y-min_y)/(max_x-min_x))\n\n    plt.figure(figsize=(8, 8*ratio), dpi=300)\n    plt.gca().invert_yaxis()  # 反转y轴，将原点移至左上角\n\n    gt_traj=[]\n    for i in range(l):\n        coordinates=data[keys[i]]\n        x_coords, y_coords = zip(*coordinates)\n        gt_traj.append(coordinates[0])\n        color = colors[i % len(colors)]\n        plt.scatter(x_coords[0], y_coords[0], s=15, marker='o',c='r')\n        # plt.scatter(x_coords[1:], y_coords[1:], s=15, marker='o',c=color)\n        for j in range(len(coordinates) - 1):\n            if i+j > l-2: \n                break\n            plt.plot([x_coords[j], x_coords[j + 1]], [y_coords[j], y_coords[j + 1]], '-',c=color, linewidth=0.5)  \n    \n    x_gt_coords, y_gt_coords = zip(*gt_traj)\n    for i in range(len(gt_traj) - 1):\n        plt.plot([x_gt_coords[i], x_gt_coords[i + 1]], [y_gt_coords[i], y_gt_coords[i + 1]], '-',c='r', linewidth=1)  \n    plt.axis('equal') \n    \n    for i in range(l):\n\n        col_coordinates=new_data[keys[i]]\n        x_coords, y_coords = zip(*col_coordinates)\n        color = colors[i % len(colors)]\n        for j in range(len(col_coordinates)-1):\n            marker = markers[j % len(markers) ]\n            # plt.plot([x_coords[j], x_coords[j + 1]], [y_coords[j], y_coords[j + 1]], '-',c=color, linewidth=0.5)  \n            plt.scatter(x_coords[j], y_coords[j], s=10, marker=marker,c=color)\n\n\n    plt.xlabel('X')\n    plt.ylabel('Y')\n    s =keys[0].split(\"-\")\n    key='-'.join(s[:2])\n    plt.savefig(f'../{key}_111_fix2.png')\n    print(key)\n    plt.close()"
  },
  {
    "path": "tools/misc/browse_dataset.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport warnings\nfrom os import path as osp\nfrom pathlib import Path\n\nimport mmcv\nimport numpy as np\nfrom mmcv import Config, DictAction, mkdir_or_exist\n\nfrom mmdet3d.core.bbox import (Box3DMode, CameraInstance3DBoxes, Coord3DMode,\n                               DepthInstance3DBoxes, LiDARInstance3DBoxes)\nfrom mmdet3d.core.visualizer import (show_multi_modality_result, show_result,\n                                     show_seg_result)\nfrom mmdet3d.datasets import build_dataset\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Browse a dataset')\n    parser.add_argument('config', help='train config file path')\n    parser.add_argument(\n        '--skip-type',\n        type=str,\n        nargs='+',\n        default=['Normalize'],\n        help='skip some useless pipeline')\n    parser.add_argument(\n        '--output-dir',\n        default=None,\n        type=str,\n        help='If there is no display interface, you can save it')\n    parser.add_argument(\n        '--task',\n        type=str,\n        choices=['det', 'seg', 'multi_modality-det', 'mono-det'],\n        help='Determine the visualization method depending on the task.')\n    parser.add_argument(\n        '--aug',\n        action='store_true',\n        help='Whether to visualize augmented datasets or original dataset.')\n    parser.add_argument(\n        '--online',\n        action='store_true',\n        help='Whether to perform online visualization. Note that you often '\n        'need a monitor to do so.')\n    parser.add_argument(\n        '--cfg-options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file. If the value to '\n        'be overwritten is a list, it should be like key=\"[a,b]\" or key=a,b '\n        'It also allows nested list/tuple values, e.g. key=\"[(a,b),(c,d)]\" '\n        'Note that the quotation marks are necessary and that no white space '\n        'is allowed.')\n    args = parser.parse_args()\n    return args\n\n\ndef build_data_cfg(config_path, skip_type, aug, cfg_options):\n    \"\"\"Build data config for loading visualization data.\"\"\"\n\n    cfg = Config.fromfile(config_path)\n    if cfg_options is not None:\n        cfg.merge_from_dict(cfg_options)\n    # extract inner dataset of `RepeatDataset` as `cfg.data.train`\n    # so we don't need to worry about it later\n    if cfg.data.train['type'] == 'RepeatDataset':\n        cfg.data.train = cfg.data.train.dataset\n    # use only first dataset for `ConcatDataset`\n    if cfg.data.train['type'] == 'ConcatDataset':\n        cfg.data.train = cfg.data.train.datasets[0]\n    train_data_cfg = cfg.data.train\n\n    if aug:\n        show_pipeline = cfg.train_pipeline\n    else:\n        show_pipeline = cfg.eval_pipeline\n        for i in range(len(cfg.train_pipeline)):\n            if cfg.train_pipeline[i]['type'] == 'LoadAnnotations3D':\n                show_pipeline.insert(i, cfg.train_pipeline[i])\n            # Collect points as well as labels\n            if cfg.train_pipeline[i]['type'] == 'Collect3D':\n                if show_pipeline[-1]['type'] == 'Collect3D':\n                    show_pipeline[-1] = cfg.train_pipeline[i]\n                else:\n                    show_pipeline.append(cfg.train_pipeline[i])\n\n    train_data_cfg['pipeline'] = [\n        x for x in show_pipeline if x['type'] not in skip_type\n    ]\n\n    return cfg\n\n\ndef to_depth_mode(points, bboxes):\n    \"\"\"Convert points and bboxes to Depth Coord and Depth Box mode.\"\"\"\n    if points is not None:\n        points = Coord3DMode.convert_point(points.copy(), Coord3DMode.LIDAR,\n                                           Coord3DMode.DEPTH)\n    if bboxes is not None:\n        bboxes = Box3DMode.convert(bboxes.clone(), Box3DMode.LIDAR,\n                                   Box3DMode.DEPTH)\n    return points, bboxes\n\n\ndef show_det_data(input, out_dir, show=False):\n    \"\"\"Visualize 3D point cloud and 3D bboxes.\"\"\"\n    img_metas = input['img_metas']._data\n    points = input['points']._data.numpy()\n    gt_bboxes = input['gt_bboxes_3d']._data.tensor\n    if img_metas['box_mode_3d'] != Box3DMode.DEPTH:\n        points, gt_bboxes = to_depth_mode(points, gt_bboxes)\n    filename = osp.splitext(osp.basename(img_metas['pts_filename']))[0]\n    show_result(\n        points,\n        gt_bboxes.clone(),\n        None,\n        out_dir,\n        filename,\n        show=show,\n        snapshot=True)\n\n\ndef show_seg_data(input, out_dir, show=False):\n    \"\"\"Visualize 3D point cloud and segmentation mask.\"\"\"\n    img_metas = input['img_metas']._data\n    points = input['points']._data.numpy()\n    gt_seg = input['pts_semantic_mask']._data.numpy()\n    filename = osp.splitext(osp.basename(img_metas['pts_filename']))[0]\n    show_seg_result(\n        points,\n        gt_seg.copy(),\n        None,\n        out_dir,\n        filename,\n        np.array(img_metas['PALETTE']),\n        img_metas['ignore_index'],\n        show=show,\n        snapshot=True)\n\n\ndef show_proj_bbox_img(input, out_dir, show=False, is_nus_mono=False):\n    \"\"\"Visualize 3D bboxes on 2D image by projection.\"\"\"\n    gt_bboxes = input['gt_bboxes_3d']._data\n    img_metas = input['img_metas']._data\n    img = input['img']._data.numpy()\n    # need to transpose channel to first dim\n    img = img.transpose(1, 2, 0)\n    # no 3D gt bboxes, just show img\n    if gt_bboxes.tensor.shape[0] == 0:\n        gt_bboxes = None\n    filename = Path(img_metas['filename']).name\n    if isinstance(gt_bboxes, DepthInstance3DBoxes):\n        show_multi_modality_result(\n            img,\n            gt_bboxes,\n            None,\n            None,\n            out_dir,\n            filename,\n            box_mode='depth',\n            img_metas=img_metas,\n            show=show)\n    elif isinstance(gt_bboxes, LiDARInstance3DBoxes):\n        show_multi_modality_result(\n            img,\n            gt_bboxes,\n            None,\n            img_metas['lidar2img'],\n            out_dir,\n            filename,\n            box_mode='lidar',\n            img_metas=img_metas,\n            show=show)\n    elif isinstance(gt_bboxes, CameraInstance3DBoxes):\n        show_multi_modality_result(\n            img,\n            gt_bboxes,\n            None,\n            img_metas['cam2img'],\n            out_dir,\n            filename,\n            box_mode='camera',\n            img_metas=img_metas,\n            show=show)\n    else:\n        # can't project, just show img\n        warnings.warn(\n            f'unrecognized gt box type {type(gt_bboxes)}, only show image')\n        show_multi_modality_result(\n            img, None, None, None, out_dir, filename, show=show)\n\n\ndef main():\n    args = parse_args()\n\n    if args.output_dir is not None:\n        mkdir_or_exist(args.output_dir)\n\n    cfg = build_data_cfg(args.config, args.skip_type, args.aug,\n                         args.cfg_options)\n    try:\n        dataset = build_dataset(\n            cfg.data.train, default_args=dict(filter_empty_gt=False))\n    except TypeError:  # seg dataset doesn't have `filter_empty_gt` key\n        dataset = build_dataset(cfg.data.train)\n\n    dataset_type = cfg.dataset_type\n    # configure visualization mode\n    vis_task = args.task  # 'det', 'seg', 'multi_modality-det', 'mono-det'\n    progress_bar = mmcv.ProgressBar(len(dataset))\n\n    for input in dataset:\n        if vis_task in ['det', 'multi_modality-det']:\n            # show 3D bboxes on 3D point clouds\n            show_det_data(input, args.output_dir, show=args.online)\n        if vis_task in ['multi_modality-det', 'mono-det']:\n            # project 3D bboxes to 2D image\n            show_proj_bbox_img(\n                input,\n                args.output_dir,\n                show=args.online,\n                is_nus_mono=(dataset_type == 'NuScenesMonoDataset'))\n        elif vis_task in ['seg']:\n            # show 3D segmentation mask on 3D point clouds\n            show_seg_data(input, args.output_dir, show=args.online)\n        progress_bar.update()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/misc/download.sh",
    "content": "# Download zip dataset from Google Drive\nfilename='dd3d_det_final.pth'\n# https://drive.google.com/file/d/158ltbC_wjRoe3uBnktbwCgeIByadwxTY/view?usp=share_link\n# https://drive.google.com/file/d/1gQkhWERCzAosBwG5bh2BKkt1k0TJZt-A/view?usp=share_link\nfileid='1gQkhWERCzAosBwG5bh2BKkt1k0TJZt-A'\nwget --load-cookies /tmp.txt \"https://drive.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://drive.google.com/uc?export=download&id=${fileid}' -O- | sed -rn 's/.confirm=([0-9A-Za-z_]+)./\\1\\n/p')&id=${fileid}\" -O ${filename}\n"
  },
  {
    "path": "tools/misc/fuse_conv_bn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\n\nimport torch\nfrom mmcv.runner import save_checkpoint\nfrom torch import nn as nn\n\nfrom mmdet3d.apis import init_model\n\n\ndef fuse_conv_bn(conv, bn):\n    \"\"\"During inference, the functionary of batch norm layers is turned off but\n    only the mean and var alone channels are used, which exposes the chance to\n    fuse it with the preceding conv layers to save computations and simplify\n    network structures.\"\"\"\n    conv_w = conv.weight\n    conv_b = conv.bias if conv.bias is not None else torch.zeros_like(\n        bn.running_mean)\n\n    factor = bn.weight / torch.sqrt(bn.running_var + bn.eps)\n    conv.weight = nn.Parameter(conv_w *\n                               factor.reshape([conv.out_channels, 1, 1, 1]))\n    conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias)\n    return conv\n\n\ndef fuse_module(m):\n    last_conv = None\n    last_conv_name = None\n\n    for name, child in m.named_children():\n        if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)):\n            if last_conv is None:  # only fuse BN that is after Conv\n                continue\n            fused_conv = fuse_conv_bn(last_conv, child)\n            m._modules[last_conv_name] = fused_conv\n            # To reduce changes, set BN as Identity instead of deleting it.\n            m._modules[name] = nn.Identity()\n            last_conv = None\n        elif isinstance(child, nn.Conv2d):\n            last_conv = child\n            last_conv_name = name\n        else:\n            fuse_module(child)\n    return m\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='fuse Conv and BN layers in a model')\n    parser.add_argument('config', help='config file path')\n    parser.add_argument('checkpoint', help='checkpoint file path')\n    parser.add_argument('out', help='output path of the converted model')\n    args = parser.parse_args()\n    return args\n\n\ndef main():\n    args = parse_args()\n    # build the model from a config file and a checkpoint file\n    model = init_model(args.config, args.checkpoint)\n    # fuse conv and bn layers of the model\n    fused_model = fuse_module(model)\n    save_checkpoint(fused_model, args.out)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/misc/print_config.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\n\nfrom mmcv import Config, DictAction\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Print the whole config')\n    parser.add_argument('config', help='config file path')\n    parser.add_argument(\n        '--options', nargs='+', action=DictAction, help='arguments in dict')\n    args = parser.parse_args()\n\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    cfg = Config.fromfile(args.config)\n    if args.options is not None:\n        cfg.merge_from_dict(args.options)\n    print(f'Config:\\n{cfg.pretty_text}')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/misc/tmp.txt",
    "content": "# Netscape HTTP Cookie File\n# http://curl.haxx.se/rfc/cookie_spec.html\n# This file was generated by Cookie-Editor\n.google.com\tTRUE\t/\tTRUE\t1717919270\tSAPISID\tfDOISBW6egQFC6aK/AYuRwhTHDp55K6ccn\n.google.com\tTRUE\t/\tTRUE\t1717919270\t__Secure-3PAPISID\tfDOISBW6egQFC6aK/AYuRwhTHDp55K6ccn\n.google.com\tTRUE\t/\tTRUE\t1693385071\tAEC\tAUEFqZfwniMLFUHoIt6OD4mKbetRVudbGNtaLrck-T7gPkWVxz9BodiWk14\n.google.com\tTRUE\t/drive\tFALSE\t1713617653\t_ga_3WTQFP9ECQ\tGS1.1-2.1679055684.1.1.1679057653.0.0.0\n.google.com\tTRUE\t/drive\tFALSE\t1713617653\t_ga\tGA1.2-2.1901804985.1679055685\n.google.com\tTRUE\t/\tTRUE\t1700296889\tNID\t511=rk4NeUDX9SGFBufJUHAaorRIa-Fl4MPmY6B2hUaho_L1KK2SQD1dc-0w1MGV-Z_BMFkIES9xOzD8JHz-ywW2j-f_l-9wjuMTrTW2p2Ykge9XU01HugAoZSiR3fK4G_D7maxP3AwWIskdRFLMM3yiGO8EjvAN8V_Q89eoSresXbjSpNV0n4RFNrWq4bIaaDasSU4mmEqoREXGeULS15PcX2Hx9aF9C8FIh0A5tqSJ1ClkxhNouUh5pf8jTPdeqKNB9lBy6qbYM9wUy968rwt-pOetIumwWB7x0Q\n.google.com\tTRUE\t/\tFALSE\t1717919270\tAPISID\tpPIDTbOXJnT7lKlc/A4fGqNyULYJeeVvtd\n.google.com\tTRUE\t/\tTRUE\t1716025323\t__Secure-1PSIDTS\tsidts-CjIBLFra0u27E7T2JqfS2rhsCoQmeceLn796H-Ut4GZqXKQg5v5SyYKGSL-OhwScp5CHNxAA\ndrive.google.com\tTRUE\t/drive\tTRUE\t1685353314\tCOMPASS\tdrive=CgAQ8qSdowYaWQAJa4lXUmoMUbVs5gaqovaTKg_TrPkHDxURXheR7ig5ALHoe4GC0baVX590fz95pSKf606cWNPRQvdlqawQDUYeobek7OFDtYNM5LnYCdhOn7Y7vMSvcahE\ndrive.google.com\tTRUE\t/\tTRUE\t1717073695\t__Secure-OSID\tVwgfZ96E2CAZyjiD6u_Nx_5J9Y5hb917w3tcSp-Fvp5kgVIQ2Lv5E8fIgvtXiYExVyDZZg.\n.google.com\tTRUE\t/\tTRUE\t1717919270\t__Secure-1PAPISID\tfDOISBW6egQFC6aK/AYuRwhTHDp55K6ccn\n.google.com\tTRUE\t/\tTRUE\t1717919270\t__Secure-3PSID\tWQgfZxy61xHIQI6nrH63KOv8Nt7LeDdOB65vZO17iNbB0VQvKtzPHNMwpPDZZtXpUlYDdA.\ndrive.google.com\tTRUE\t/\tTRUE\t1684576399\tOTZ\t6994673_24_24__24_\n.google.com\tTRUE\t/\tFALSE\t1687006233\t1P_JAR\t2023-5-18-12\n.google.com\tTRUE\t/\tTRUE\t1717919270\t__Secure-1PSID\tWQgfZxy61xHIQI6nrH63KOv8Nt7LeDdOB65vZO17iNbB0VQvIgq7ABf39EliH1-4MFh0bw.\n.google.com\tTRUE\t/\tTRUE\t1716025497\t__Secure-1PSIDCC\tAP8dLtz1xdzlPTgBciw5RLQBNR2VfYf_cq5yiAdUv6uH6W0QLclkU5DOa5CsffyvO_hJrNJiKmm7\n.google.com\tTRUE\t/\tTRUE\t1716025505\t__Secure-3PSIDCC\tAP8dLtzwXzEPEVW8QkHFSVWbGpToU60VUdNUFYqTaqwa76HMtku_K1Mx5WnEFYRSLYSS-eVTalK6\n.google.com\tTRUE\t/\tTRUE\t1716025323\t__Secure-3PSIDTS\tsidts-CjIBLFra0u27E7T2JqfS2rhsCoQmeceLn796H-Ut4GZqXKQg5v5SyYKGSL-OhwScp5CHNxAA\n.google.com\tTRUE\t/\tTRUE\t1698201757\t__Secure-ENID\t10.SE=RAec2IGzq1Xo3xWELDpWGbcO3L7Vjn-ZuxxpsVg7wVU3TdXnOGr2p6zVusjvIqoHmYPQiK3hWtXIuFqBoJSf_sOEL46i922mvjDWB_eoSQBBK8yCyrkB1jtzlr_nfZjW0ZDtJUTA7UaDHXiDxRTVOyBKrTdY-k9ZTWeLzWp9LxjzsI8L_Aur09UvApRT01Ycsb1H_LGzQcPYThN3NEhfELdBApDvGJT5w9EH0sFgz7RIz42x5QBNJy4zJlWdeNFJwdgkbA-l4h6q1RpGVq4z\n.google.com\tTRUE\t/\tTRUE\t1699694398\tCONSENT\tPENDING+442\n.google.com\tTRUE\t/\tFALSE\t1717919270\tHSID\tA8wA95lYkUAKA7rDu\ndrive.google.com\tTRUE\t/\tTRUE\t1717073695\tOSID\tVwgfZ96E2CAZyjiD6u_Nx_5J9Y5hb917w3tcSp-Fvp5kgVIQZmJpCHob8u2FFlJTXPv3mA.\n.google.com\tTRUE\t/\tTRUE\t1684575936\tS\tbilling-ui-v3=lmhpo1c6WCSMkIRa_Js-b_qLtL8W9dQ6:billing-ui-v3-efe=lmhpo1c6WCSMkIRa_Js-b_qLtL8W9dQ6\n.google.com\tTRUE\t/\tFALSE\t1697607299\tSEARCH_SAMESITE\tCgQIjJgB\n.google.com\tTRUE\t/\tFALSE\t1717919270\tSID\tWQgfZxy61xHIQI6nrH63KOv8Nt7LeDdOB65vZO17iNbB0VQv7-kmUUio1TuXFFn2qkHV6g.\n.google.com\tTRUE\t/\tFALSE\t1716025497\tSIDCC\tAP8dLtzAhi_Ib-NgPFkfzLkT3mT9_Pn7dhRyf5YdQWjsj6mYNL1-3JSY2BSLd6fLFKDJBYoKuhM\n.google.com\tTRUE\t/\tTRUE\t1717919270\tSSID\tAkJid5vjB9OBQaw4W"
  },
  {
    "path": "tools/misc/visualize_results.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\n\nimport mmcv\nfrom mmcv import Config\n\nfrom mmdet3d.datasets import build_dataset\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='MMDet3D visualize the results')\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument('--result', help='results file in pickle format')\n    parser.add_argument(\n        '--show-dir', help='directory where visualize results will be saved')\n    args = parser.parse_args()\n\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    if args.result is not None and \\\n            not args.result.endswith(('.pkl', '.pickle')):\n        raise ValueError('The results file must be a pkl file.')\n\n    cfg = Config.fromfile(args.config)\n    cfg.data.test.test_mode = True\n\n    # build the dataset\n    dataset = build_dataset(cfg.data.test)\n    results = mmcv.load(args.result)\n\n    if getattr(dataset, 'show', None) is not None:\n        # data loading pipeline for showing\n        eval_pipeline = cfg.get('eval_pipeline', {})\n        if eval_pipeline:\n            dataset.show(results, args.show_dir, pipeline=eval_pipeline)\n        else:\n            dataset.show(results, args.show_dir)  # use default pipeline\n    else:\n        raise NotImplementedError(\n            'Show is not implemented for dataset {}!'.format(\n                type(dataset).__name__))\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/model_converters/convert_h3dnet_checkpoints.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport tempfile\n\nimport torch\nfrom mmcv import Config\nfrom mmcv.runner import load_state_dict\n\nfrom mmdet3d.models import build_detector\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='MMDet3D upgrade model version(before v0.6.0) of H3DNet')\n    parser.add_argument('checkpoint', help='checkpoint file')\n    parser.add_argument('--out', help='path of the output checkpoint file')\n    args = parser.parse_args()\n    return args\n\n\ndef parse_config(config_strings):\n    \"\"\"Parse config from strings.\n\n    Args:\n        config_strings (string): strings of model config.\n\n    Returns:\n        Config: model config\n    \"\"\"\n    temp_file = tempfile.NamedTemporaryFile()\n    config_path = f'{temp_file.name}.py'\n    with open(config_path, 'w') as f:\n        f.write(config_strings)\n\n    config = Config.fromfile(config_path)\n\n    # Update backbone config\n    if 'pool_mod' in config.model.backbone.backbones:\n        config.model.backbone.backbones.pop('pool_mod')\n\n    if 'sa_cfg' not in config.model.backbone:\n        config.model.backbone['sa_cfg'] = dict(\n            type='PointSAModule',\n            pool_mod='max',\n            use_xyz=True,\n            normalize_xyz=True)\n\n    if 'type' not in config.model.rpn_head.vote_aggregation_cfg:\n        config.model.rpn_head.vote_aggregation_cfg['type'] = 'PointSAModule'\n\n    # Update rpn_head config\n    if 'pred_layer_cfg' not in config.model.rpn_head:\n        config.model.rpn_head['pred_layer_cfg'] = dict(\n            in_channels=128, shared_conv_channels=(128, 128), bias=True)\n\n    if 'feat_channels' in config.model.rpn_head:\n        config.model.rpn_head.pop('feat_channels')\n\n    if 'vote_moudule_cfg' in config.model.rpn_head:\n        config.model.rpn_head['vote_module_cfg'] = config.model.rpn_head.pop(\n            'vote_moudule_cfg')\n\n    if config.model.rpn_head.vote_aggregation_cfg.use_xyz:\n        config.model.rpn_head.vote_aggregation_cfg.mlp_channels[0] -= 3\n\n    for cfg in config.model.roi_head.primitive_list:\n        cfg['vote_module_cfg'] = cfg.pop('vote_moudule_cfg')\n        cfg.vote_aggregation_cfg.mlp_channels[0] -= 3\n        if 'type' not in cfg.vote_aggregation_cfg:\n            cfg.vote_aggregation_cfg['type'] = 'PointSAModule'\n\n    if 'type' not in config.model.roi_head.bbox_head.suface_matching_cfg:\n        config.model.roi_head.bbox_head.suface_matching_cfg[\n            'type'] = 'PointSAModule'\n\n    if config.model.roi_head.bbox_head.suface_matching_cfg.use_xyz:\n        config.model.roi_head.bbox_head.suface_matching_cfg.mlp_channels[\n            0] -= 3\n\n    if 'type' not in config.model.roi_head.bbox_head.line_matching_cfg:\n        config.model.roi_head.bbox_head.line_matching_cfg[\n            'type'] = 'PointSAModule'\n\n    if config.model.roi_head.bbox_head.line_matching_cfg.use_xyz:\n        config.model.roi_head.bbox_head.line_matching_cfg.mlp_channels[0] -= 3\n\n    if 'proposal_module_cfg' in config.model.roi_head.bbox_head:\n        config.model.roi_head.bbox_head.pop('proposal_module_cfg')\n\n    temp_file.close()\n\n    return config\n\n\ndef main():\n    \"\"\"Convert keys in checkpoints for VoteNet.\n\n    There can be some breaking changes during the development of mmdetection3d,\n    and this tool is used for upgrading checkpoints trained with old versions\n    (before v0.6.0) to the latest one.\n    \"\"\"\n    args = parse_args()\n    checkpoint = torch.load(args.checkpoint)\n    cfg = parse_config(checkpoint['meta']['config'])\n    # Build the model and load checkpoint\n    model = build_detector(\n        cfg.model,\n        train_cfg=cfg.get('train_cfg'),\n        test_cfg=cfg.get('test_cfg'))\n    orig_ckpt = checkpoint['state_dict']\n    converted_ckpt = orig_ckpt.copy()\n\n    if cfg['dataset_type'] == 'ScanNetDataset':\n        NUM_CLASSES = 18\n    elif cfg['dataset_type'] == 'SUNRGBDDataset':\n        NUM_CLASSES = 10\n    else:\n        raise NotImplementedError\n\n    RENAME_PREFIX = {\n        'rpn_head.conv_pred.0': 'rpn_head.conv_pred.shared_convs.layer0',\n        'rpn_head.conv_pred.1': 'rpn_head.conv_pred.shared_convs.layer1'\n    }\n\n    DEL_KEYS = [\n        'rpn_head.conv_pred.0.bn.num_batches_tracked',\n        'rpn_head.conv_pred.1.bn.num_batches_tracked'\n    ]\n\n    EXTRACT_KEYS = {\n        'rpn_head.conv_pred.conv_cls.weight':\n        ('rpn_head.conv_pred.conv_out.weight', [(0, 2), (-NUM_CLASSES, -1)]),\n        'rpn_head.conv_pred.conv_cls.bias':\n        ('rpn_head.conv_pred.conv_out.bias', [(0, 2), (-NUM_CLASSES, -1)]),\n        'rpn_head.conv_pred.conv_reg.weight':\n        ('rpn_head.conv_pred.conv_out.weight', [(2, -NUM_CLASSES)]),\n        'rpn_head.conv_pred.conv_reg.bias':\n        ('rpn_head.conv_pred.conv_out.bias', [(2, -NUM_CLASSES)])\n    }\n\n    # Delete some useless keys\n    for key in DEL_KEYS:\n        converted_ckpt.pop(key)\n\n    # Rename keys with specific prefix\n    RENAME_KEYS = dict()\n    for old_key in converted_ckpt.keys():\n        for rename_prefix in RENAME_PREFIX.keys():\n            if rename_prefix in old_key:\n                new_key = old_key.replace(rename_prefix,\n                                          RENAME_PREFIX[rename_prefix])\n                RENAME_KEYS[new_key] = old_key\n    for new_key, old_key in RENAME_KEYS.items():\n        converted_ckpt[new_key] = converted_ckpt.pop(old_key)\n\n    # Extract weights and rename the keys\n    for new_key, (old_key, indices) in EXTRACT_KEYS.items():\n        cur_layers = orig_ckpt[old_key]\n        converted_layers = []\n        for (start, end) in indices:\n            if end != -1:\n                converted_layers.append(cur_layers[start:end])\n            else:\n                converted_layers.append(cur_layers[start:])\n        converted_layers = torch.cat(converted_layers, 0)\n        converted_ckpt[new_key] = converted_layers\n        if old_key in converted_ckpt.keys():\n            converted_ckpt.pop(old_key)\n\n    # Check the converted checkpoint by loading to the model\n    load_state_dict(model, converted_ckpt, strict=True)\n    checkpoint['state_dict'] = converted_ckpt\n    torch.save(checkpoint, args.out)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/model_converters/convert_votenet_checkpoints.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport tempfile\n\nimport torch\nfrom mmcv import Config\nfrom mmcv.runner import load_state_dict\n\nfrom mmdet3d.models import build_detector\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='MMDet3D upgrade model version(before v0.6.0) of VoteNet')\n    parser.add_argument('checkpoint', help='checkpoint file')\n    parser.add_argument('--out', help='path of the output checkpoint file')\n    args = parser.parse_args()\n    return args\n\n\ndef parse_config(config_strings):\n    \"\"\"Parse config from strings.\n\n    Args:\n        config_strings (string): strings of model config.\n\n    Returns:\n        Config: model config\n    \"\"\"\n    temp_file = tempfile.NamedTemporaryFile()\n    config_path = f'{temp_file.name}.py'\n    with open(config_path, 'w') as f:\n        f.write(config_strings)\n\n    config = Config.fromfile(config_path)\n\n    # Update backbone config\n    if 'pool_mod' in config.model.backbone:\n        config.model.backbone.pop('pool_mod')\n\n    if 'sa_cfg' not in config.model.backbone:\n        config.model.backbone['sa_cfg'] = dict(\n            type='PointSAModule',\n            pool_mod='max',\n            use_xyz=True,\n            normalize_xyz=True)\n\n    if 'type' not in config.model.bbox_head.vote_aggregation_cfg:\n        config.model.bbox_head.vote_aggregation_cfg['type'] = 'PointSAModule'\n\n    # Update bbox_head config\n    if 'pred_layer_cfg' not in config.model.bbox_head:\n        config.model.bbox_head['pred_layer_cfg'] = dict(\n            in_channels=128, shared_conv_channels=(128, 128), bias=True)\n\n    if 'feat_channels' in config.model.bbox_head:\n        config.model.bbox_head.pop('feat_channels')\n\n    if 'vote_moudule_cfg' in config.model.bbox_head:\n        config.model.bbox_head['vote_module_cfg'] = config.model.bbox_head.pop(\n            'vote_moudule_cfg')\n\n    if config.model.bbox_head.vote_aggregation_cfg.use_xyz:\n        config.model.bbox_head.vote_aggregation_cfg.mlp_channels[0] -= 3\n\n    temp_file.close()\n\n    return config\n\n\ndef main():\n    \"\"\"Convert keys in checkpoints for VoteNet.\n\n    There can be some breaking changes during the development of mmdetection3d,\n    and this tool is used for upgrading checkpoints trained with old versions\n    (before v0.6.0) to the latest one.\n    \"\"\"\n    args = parse_args()\n    checkpoint = torch.load(args.checkpoint)\n    cfg = parse_config(checkpoint['meta']['config'])\n    # Build the model and load checkpoint\n    model = build_detector(\n        cfg.model,\n        train_cfg=cfg.get('train_cfg'),\n        test_cfg=cfg.get('test_cfg'))\n    orig_ckpt = checkpoint['state_dict']\n    converted_ckpt = orig_ckpt.copy()\n\n    if cfg['dataset_type'] == 'ScanNetDataset':\n        NUM_CLASSES = 18\n    elif cfg['dataset_type'] == 'SUNRGBDDataset':\n        NUM_CLASSES = 10\n    else:\n        raise NotImplementedError\n\n    RENAME_PREFIX = {\n        'bbox_head.conv_pred.0': 'bbox_head.conv_pred.shared_convs.layer0',\n        'bbox_head.conv_pred.1': 'bbox_head.conv_pred.shared_convs.layer1'\n    }\n\n    DEL_KEYS = [\n        'bbox_head.conv_pred.0.bn.num_batches_tracked',\n        'bbox_head.conv_pred.1.bn.num_batches_tracked'\n    ]\n\n    EXTRACT_KEYS = {\n        'bbox_head.conv_pred.conv_cls.weight':\n        ('bbox_head.conv_pred.conv_out.weight', [(0, 2), (-NUM_CLASSES, -1)]),\n        'bbox_head.conv_pred.conv_cls.bias':\n        ('bbox_head.conv_pred.conv_out.bias', [(0, 2), (-NUM_CLASSES, -1)]),\n        'bbox_head.conv_pred.conv_reg.weight':\n        ('bbox_head.conv_pred.conv_out.weight', [(2, -NUM_CLASSES)]),\n        'bbox_head.conv_pred.conv_reg.bias':\n        ('bbox_head.conv_pred.conv_out.bias', [(2, -NUM_CLASSES)])\n    }\n\n    # Delete some useless keys\n    for key in DEL_KEYS:\n        converted_ckpt.pop(key)\n\n    # Rename keys with specific prefix\n    RENAME_KEYS = dict()\n    for old_key in converted_ckpt.keys():\n        for rename_prefix in RENAME_PREFIX.keys():\n            if rename_prefix in old_key:\n                new_key = old_key.replace(rename_prefix,\n                                          RENAME_PREFIX[rename_prefix])\n                RENAME_KEYS[new_key] = old_key\n    for new_key, old_key in RENAME_KEYS.items():\n        converted_ckpt[new_key] = converted_ckpt.pop(old_key)\n\n    # Extract weights and rename the keys\n    for new_key, (old_key, indices) in EXTRACT_KEYS.items():\n        cur_layers = orig_ckpt[old_key]\n        converted_layers = []\n        for (start, end) in indices:\n            if end != -1:\n                converted_layers.append(cur_layers[start:end])\n            else:\n                converted_layers.append(cur_layers[start:])\n        converted_layers = torch.cat(converted_layers, 0)\n        converted_ckpt[new_key] = converted_layers\n        if old_key in converted_ckpt.keys():\n            converted_ckpt.pop(old_key)\n\n    # Check the converted checkpoint by loading to the model\n    load_state_dict(model, converted_ckpt, strict=True)\n    checkpoint['state_dict'] = converted_ckpt\n    torch.save(checkpoint, args.out)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/model_converters/publish_model.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport subprocess\n\nimport torch\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Process a checkpoint to be published')\n    parser.add_argument('in_file', help='input checkpoint filename')\n    parser.add_argument('out_file', help='output checkpoint filename')\n    args = parser.parse_args()\n    return args\n\n\ndef process_checkpoint(in_file, out_file):\n    checkpoint = torch.load(in_file, map_location='cpu')\n    # remove optimizer for smaller file size\n    if 'optimizer' in checkpoint:\n        del checkpoint['optimizer']\n    # if it is necessary to remove some sensitive data in checkpoint['meta'],\n    # add the code here.\n    torch.save(checkpoint, out_file)\n    sha = subprocess.check_output(['sha256sum', out_file]).decode()\n    final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])\n    subprocess.Popen(['mv', out_file, final_file])\n\n\ndef main():\n    args = parse_args()\n    process_checkpoint(args.in_file, args.out_file)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/model_converters/regnet2mmdet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nfrom collections import OrderedDict\n\nimport torch\n\n\ndef convert_stem(model_key, model_weight, state_dict, converted_names):\n    new_key = model_key.replace('stem.conv', 'conv1')\n    new_key = new_key.replace('stem.bn', 'bn1')\n    state_dict[new_key] = model_weight\n    converted_names.add(model_key)\n    print(f'Convert {model_key} to {new_key}')\n\n\ndef convert_head(model_key, model_weight, state_dict, converted_names):\n    new_key = model_key.replace('head.fc', 'fc')\n    state_dict[new_key] = model_weight\n    converted_names.add(model_key)\n    print(f'Convert {model_key} to {new_key}')\n\n\ndef convert_reslayer(model_key, model_weight, state_dict, converted_names):\n    split_keys = model_key.split('.')\n    layer, block, module = split_keys[:3]\n    block_id = int(block[1:])\n    layer_name = f'layer{int(layer[1:])}'\n    block_name = f'{block_id - 1}'\n\n    if block_id == 1 and module == 'bn':\n        new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}'\n    elif block_id == 1 and module == 'proj':\n        new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}'\n    elif module == 'f':\n        if split_keys[3] == 'a_bn':\n            module_name = 'bn1'\n        elif split_keys[3] == 'b_bn':\n            module_name = 'bn2'\n        elif split_keys[3] == 'c_bn':\n            module_name = 'bn3'\n        elif split_keys[3] == 'a':\n            module_name = 'conv1'\n        elif split_keys[3] == 'b':\n            module_name = 'conv2'\n        elif split_keys[3] == 'c':\n            module_name = 'conv3'\n        new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}'\n    else:\n        raise ValueError(f'Unsupported conversion of key {model_key}')\n    print(f'Convert {model_key} to {new_key}')\n    state_dict[new_key] = model_weight\n    converted_names.add(model_key)\n\n\ndef convert(src, dst):\n    \"\"\"Convert keys in pycls pretrained RegNet models to mmdet style.\"\"\"\n    # load caffe model\n    regnet_model = torch.load(src)\n    blobs = regnet_model['model_state']\n    # convert to pytorch style\n    state_dict = OrderedDict()\n    converted_names = set()\n    for key, weight in blobs.items():\n        if 'stem' in key:\n            convert_stem(key, weight, state_dict, converted_names)\n        elif 'head' in key:\n            convert_head(key, weight, state_dict, converted_names)\n        elif key.startswith('s'):\n            convert_reslayer(key, weight, state_dict, converted_names)\n\n    # check if all layers are converted\n    for key in blobs:\n        if key not in converted_names:\n            print(f'not converted: {key}')\n    # save checkpoint\n    checkpoint = dict()\n    checkpoint['state_dict'] = state_dict\n    torch.save(checkpoint, dst)\n\n\ndef main():\n    parser = argparse.ArgumentParser(description='Convert model keys')\n    parser.add_argument('src', help='src detectron model path')\n    parser.add_argument('dst', help='save path')\n    args = parser.parse_args()\n    convert(args.src, args.dst)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/slurm_test.sh",
    "content": "#!/usr/bin/env bash\n\nset -x\n\nPARTITION=$1\nJOB_NAME=$2\nCONFIG=$3\nCHECKPOINT=$4\nGPUS=${GPUS:-8}\nGPUS_PER_NODE=${GPUS_PER_NODE:-8}\nCPUS_PER_TASK=${CPUS_PER_TASK:-5}\nPY_ARGS=${@:5}\nSRUN_ARGS=${SRUN_ARGS:-\"\"}\n\nPYTHONPATH=\"$(dirname $0)/..\":$PYTHONPATH \\\nsrun -p ${PARTITION} \\\n    --job-name=${JOB_NAME} \\\n    --gres=gpu:${GPUS_PER_NODE} \\\n    --ntasks=${GPUS} \\\n    --ntasks-per-node=${GPUS_PER_NODE} \\\n    --cpus-per-task=${CPUS_PER_TASK} \\\n    --kill-on-bad-exit=1 \\\n    ${SRUN_ARGS} \\\n    python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher=\"slurm\" ${PY_ARGS}\n"
  },
  {
    "path": "tools/slurm_train.sh",
    "content": "#!/usr/bin/env bash\n\nset -x\n\nPARTITION=$1\nJOB_NAME=$2\nCONFIG=$3\nWORK_DIR=$4\nGPUS=${GPUS:-8}\nGPUS_PER_NODE=${GPUS_PER_NODE:-8}\nCPUS_PER_TASK=${CPUS_PER_TASK:-5}\nSRUN_ARGS=${SRUN_ARGS:-\"\"}\nPY_ARGS=${@:5}\n\nPYTHONPATH=\"$(dirname $0)/..\":$PYTHONPATH \\\nsrun -p ${PARTITION} \\\n    --job-name=${JOB_NAME} \\\n    --gres=gpu:${GPUS_PER_NODE} \\\n    --ntasks=${GPUS} \\\n    --ntasks-per-node=${GPUS_PER_NODE} \\\n    --cpus-per-task=${CPUS_PER_TASK} \\\n    --kill-on-bad-exit=1 \\\n    ${SRUN_ARGS} \\\n    python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher=\"slurm\" ${PY_ARGS}\n"
  },
  {
    "path": "tools/test.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport os\nimport warnings\n\nimport mmcv\nimport torch\nfrom mmcv import Config, DictAction\nfrom mmcv.cnn import fuse_conv_bn\nfrom mmcv.parallel import MMDataParallel, MMDistributedDataParallel\nfrom mmcv.runner import (get_dist_info, init_dist, load_checkpoint,\n                         wrap_fp16_model)\n\nimport mmdet\nfrom mmdet3d.apis import single_gpu_test\nfrom mmdet3d.datasets import build_dataloader, build_dataset\nfrom mmdet3d.models import build_model\nfrom mmdet.apis import multi_gpu_test, set_random_seed\nfrom mmdet3d.apis.test import custom_multi_gpu_test\nfrom mmdet.datasets import replace_ImageToTensor\nimport os.path as osp\nimport time\nif mmdet.__version__ > '2.23.0':\n    # If mmdet version > 2.23.0, setup_multi_processes would be imported and\n    # used from mmdet instead of mmdet3d.\n    from mmdet.utils import setup_multi_processes\nelse:\n    from mmdet3d.utils import setup_multi_processes\n\ntry:\n    # If mmdet version > 2.23.0, compat_cfg would be imported and\n    # used from mmdet instead of mmdet3d.\n    from mmdet.utils import compat_cfg\nexcept ImportError:\n    from mmdet3d.utils import compat_cfg\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='MMDet test (and eval) a model')\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument('checkpoint', help='checkpoint file')\n    parser.add_argument('--out', help='output result file in pickle format')\n    parser.add_argument(\n        '--fuse-conv-bn',\n        action='store_true',\n        help='Whether to fuse conv and bn, this will slightly increase'\n        'the inference speed')\n    parser.add_argument(\n        '--gpu-ids',\n        type=int,\n        nargs='+',\n        help='(Deprecated, please use --gpu-id) ids of gpus to use '\n        '(only applicable to non-distributed training)')\n    parser.add_argument(\n        '--gpu-id',\n        type=int,\n        default=0,\n        help='id of gpu to use '\n        '(only applicable to non-distributed testing)')\n    parser.add_argument(\n        '--format-only',\n        action='store_true',\n        help='Format the output results without perform evaluation. It is'\n        'useful when you want to format the result to a specific format and '\n        'submit it to the test server')\n    parser.add_argument(\n        '--eval',\n        type=str,\n        nargs='+',\n        help='evaluation metrics, which depends on the dataset, e.g., \"bbox\",'\n        ' \"segm\", \"proposal\" for COCO, and \"mAP\", \"recall\" for PASCAL VOC')\n    parser.add_argument(\n        '--save',\n        action='store_true',\n        help='save occupancy_data')\n    parser.add_argument('--show', action='store_true', help='show results')\n    parser.add_argument(\n        '--show-dir', help='directory where results will be saved')\n    parser.add_argument(\n        '--tag', \n        default='',\n        help='tags')\n    parser.add_argument(\n        '--gpu-collect',\n        action='store_true',\n        help='whether to use gpu to collect results.')\n    parser.add_argument(\n        '--no-aavt',\n        action='store_true',\n        help='Do not align after view transformer.')\n    parser.add_argument(\n        '--tmpdir',\n        help='tmp directory used for collecting results from multiple '\n        'workers, available when gpu-collect is not specified')\n    parser.add_argument('--seed', type=int, default=0, help='random seed')\n    parser.add_argument(\n        '--deterministic',\n        action='store_true',\n        help='whether to set deterministic options for CUDNN backend.')\n    parser.add_argument(\n        '--cfg-options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file. If the value to '\n        'be overwritten is a list, it should be like key=\"[a,b]\" or key=a,b '\n        'It also allows nested list/tuple values, e.g. key=\"[(a,b),(c,d)]\" '\n        'Note that the quotation marks are necessary and that no white space '\n        'is allowed.')\n    parser.add_argument(\n        '--options',\n        nargs='+',\n        action=DictAction,\n        help='custom options for evaluation, the key-value pair in xxx=yyy '\n        'format will be kwargs for dataset.evaluate() function (deprecate), '\n        'change to --eval-options instead.')\n    parser.add_argument(\n        '--eval-options',\n        nargs='+',\n        action=DictAction,\n        help='custom options for evaluation, the key-value pair in xxx=yyy '\n        'format will be kwargs for dataset.evaluate() function')\n    parser.add_argument(\n        '--launcher',\n        choices=['none', 'pytorch', 'slurm', 'mpi'],\n        default='none',\n        help='job launcher')\n    parser.add_argument('--local_rank', type=int, default=0)\n    args = parser.parse_args()\n    if 'LOCAL_RANK' not in os.environ:\n        os.environ['LOCAL_RANK'] = str(args.local_rank)\n\n    if args.options and args.eval_options:\n        raise ValueError(\n            '--options and --eval-options cannot be both specified, '\n            '--options is deprecated in favor of --eval-options')\n    if args.options:\n        warnings.warn('--options is deprecated in favor of --eval-options')\n        args.eval_options = args.options\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    # assert args.out or args.eval or args.format_only or args.show \\\n    #     or args.show_dir, \\\n    #     ('Please specify at least one operation (save/eval/format/show the '\n    #      'results / save the results) with the argument \"--out\", \"--eval\"'\n    #      ', \"--format-only\", \"--show\" or \"--show-dir\"')\n\n    if args.eval and args.format_only:\n        raise ValueError('--eval and --format_only cannot be both specified')\n\n    if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):\n        raise ValueError('The output file must be a pkl file.')\n\n    cfg = Config.fromfile(args.config)\n    if args.cfg_options is not None:\n        cfg.merge_from_dict(args.cfg_options)\n\n    cfg = compat_cfg(cfg)\n\n    # set multi-process settings\n    setup_multi_processes(cfg)\n\n    # set cudnn_benchmark\n    if cfg.get('cudnn_benchmark', False):\n        torch.backends.cudnn.benchmark = True\n    \n    # each process may have different time\n    out_dir =  osp.join('test', args.config.split('/')[-1][:-3]+ '_' + str(args.tag),  time.ctime().replace(' ','_').replace(':','_'))[:-8]\n\n    if args.save:\n        cfg.model.occupancy_save_path = out_dir\n        mmcv.mkdir_or_exist(out_dir)\n        mmcv.mkdir_or_exist(os.path.join(out_dir, 'occupancy_pred'))\n\n    cfg.model.pretrained = None\n\n    if args.gpu_ids is not None:\n        cfg.gpu_ids = args.gpu_ids[0:1]\n        warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. '\n                      'Because we only support single GPU mode in '\n                      'non-distributed testing. Use the first GPU '\n                      'in `gpu_ids` now.')\n    else:\n        cfg.gpu_ids = [args.gpu_id]\n\n    # init distributed env first, since logger depends on the dist info.\n    if args.launcher == 'none':\n        distributed = False\n    else:\n        distributed = True\n        init_dist(args.launcher, **cfg.dist_params)\n\n\n    test_dataloader_default_args = dict(\n        samples_per_gpu=1, workers_per_gpu=2, dist=distributed, shuffle=False)\n\n    # in case the test dataset is concatenated\n    if isinstance(cfg.data.test, dict):\n        cfg.data.test.test_mode = True\n        if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1:\n            # Replace 'ImageToTensor' to 'DefaultFormatBundle'\n            cfg.data.test.pipeline = replace_ImageToTensor(\n                cfg.data.test.pipeline)\n    elif isinstance(cfg.data.test, list):\n        for ds_cfg in cfg.data.test:\n            ds_cfg.test_mode = True\n        if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1:\n            for ds_cfg in cfg.data.test:\n                ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)\n\n    test_loader_cfg = {\n        **test_dataloader_default_args,\n        **cfg.data.get('test_dataloader', {})\n    }\n\n    # set random seeds\n    if args.seed is not None:\n        set_random_seed(args.seed, deterministic=args.deterministic)\n\n    # build the dataloader\n\n    dataset = build_dataset(cfg.data.test)\n    \n    \n    \n    data_loader = build_dataloader(dataset, **test_loader_cfg)\n\n    # build the model and load checkpoint\n    if not args.no_aavt:\n        if '4D' in cfg.model.type:\n            cfg.model.align_after_view_transfromation=True\n    cfg.model.train_cfg = None\n    model = build_model(cfg.model, test_cfg=cfg.get('test_cfg'))\n    fp16_cfg = cfg.get('fp16', None)\n    if fp16_cfg is not None:\n        wrap_fp16_model(model)\n    checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu', revise_keys=[(r'^module\\.', ''), (r'^teacher\\.', '')])\n    if args.fuse_conv_bn:\n        model = fuse_conv_bn(model)\n    # old versions did not save class info in checkpoints, this walkaround is\n    # for backward compatibility\n\n\n    sync_bn = cfg.get('sync_bn', False)\n    if distributed and sync_bn:\n        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)\n        print('Convert to SyncBatchNorm')\n\n    if 'CLASSES' in checkpoint.get('meta', {}):\n        model.CLASSES = checkpoint['meta']['CLASSES']\n    else:\n        model.CLASSES = dataset.CLASSES\n    # palette for visualization in segmentation tasks\n    if 'PALETTE' in checkpoint.get('meta', {}):\n        model.PALETTE = checkpoint['meta']['PALETTE']\n    elif hasattr(dataset, 'PALETTE'):\n        # segmentation dataset has `PALETTE` attribute\n        model.PALETTE = dataset.PALETTE\n\n    if not distributed:\n        model = MMDataParallel(model, device_ids=cfg.gpu_ids)\n        outputs = single_gpu_test(model, data_loader, args.show, args.show_dir)\n    else:\n        model = MMDistributedDataParallel(\n            model.cuda(),\n            device_ids=[torch.cuda.current_device()],\n            broadcast_buffers=False)\n        \n\n        if cfg.get('use_custom_gpu_test', True):\n            outputs = custom_multi_gpu_test(model, data_loader, args.tmpdir,\n                                 args.gpu_collect)\n        else:\n            outputs = multi_gpu_test(model, data_loader, args.tmpdir,\n                             args.gpu_collect)              \n\n    rank, _ = get_dist_info()\n\n    \n    if rank == 0:\n        if args.out:\n            print(f'\\nwriting results to {args.out}')\n            mmcv.dump(outputs, args.out)\n        kwargs = {} if args.eval_options is None else args.eval_options\n        kwargs['jsonfile_prefix'] = out_dir\n        if args.format_only:\n            dataset.format_results(outputs, **kwargs)\n        if True:\n            eval_kwargs = cfg.get('evaluation', {}).copy()\n            # kwargs['save'] =  args.save\n            # hard-code way to remove EvalHook args\n            for key in [\n                    'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',\n                    'rule'\n            ]:\n                eval_kwargs.pop(key, None)\n            eval_kwargs.update(dict(metric=args.eval, **kwargs))\n            print(dataset.evaluate(outputs, **eval_kwargs))\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/train.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom __future__ import division\nimport argparse\nimport copy\nimport os\nimport time\nimport warnings\nfrom os import path as osp\n\nimport mmcv\nimport torch\nimport torch.distributed as dist\nfrom mmcv import Config, DictAction\nfrom mmcv.runner import get_dist_info, init_dist\n\nfrom mmdet import __version__ as mmdet_version\nfrom mmdet3d import __version__ as mmdet3d_version\nfrom mmdet3d.apis import init_random_seed, train_model\nfrom mmdet3d.datasets import build_dataset\nfrom mmdet3d.models import build_model\nfrom mmdet3d.utils import collect_env, get_root_logger\nfrom mmdet.apis import set_random_seed\nfrom mmseg import __version__ as mmseg_version\nfrom collections import OrderedDict\nimport torch.multiprocessing as mp\n\n\n\ntry:\n    # If mmdet version > 2.20.0, setup_multi_processes would be imported and\n    # used from mmdet instead of mmdet3d.\n    from mmdet.utils import setup_multi_processes\nexcept ImportError:\n    from mmdet3d.utils import setup_multi_processes\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Train a detector')\n    parser.add_argument('config', help='train config file path')\n    parser.add_argument('--work-dir', help='the dir to save logs and models')\n    parser.add_argument(\n        '--resume-from', help='the checkpoint file to resume from')\n    parser.add_argument(\n        '--auto-resume',\n        action='store_true',\n        help='resume from the latest checkpoint automatically')\n    parser.add_argument(\n        '--no-validate',\n        action='store_true',\n        help='whether not to evaluate the checkpoint during training')\n    group_gpus = parser.add_mutually_exclusive_group()\n    group_gpus.add_argument(\n        '--gpus',\n        type=int,\n        help='(Deprecated, please use --gpu-id) number of gpus to use '\n        '(only applicable to non-distributed training)')\n    group_gpus.add_argument(\n        '--gpu-ids',\n        type=int,\n        nargs='+',\n        help='(Deprecated, please use --gpu-id) ids of gpus to use '\n        '(only applicable to non-distributed training)')\n    group_gpus.add_argument(\n        '--gpu-id',\n        type=int,\n        default=0,\n        help='number of gpus to use '\n        '(only applicable to non-distributed training)')\n    parser.add_argument('--seed', type=int, default=0, help='random seed')\n    parser.add_argument(\n        '--diff-seed',\n        action='store_true',\n        help='Whether or not set different seeds for different ranks')\n    parser.add_argument(\n        '--deterministic',\n        action='store_true',\n        help='whether to set deterministic options for CUDNN backend.')\n    parser.add_argument(\n        '--options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file (deprecate), '\n        'change to --cfg-options instead.')\n    parser.add_argument(\n        '--cfg-options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file. If the value to '\n        'be overwritten is a list, it should be like key=\"[a,b]\" or key=a,b '\n        'It also allows nested list/tuple values, e.g. key=\"[(a,b),(c,d)]\" '\n        'Note that the quotation marks are necessary and that no white space '\n        'is allowed.')\n    parser.add_argument(\n        '--launcher',\n        choices=['none', 'pytorch', 'slurm', 'mpi'],\n        default='none',\n        help='job launcher')\n    parser.add_argument('--local_rank', type=int, default=0)\n    parser.add_argument(\n        '--autoscale-lr',\n        action='store_true',\n        help='automatically scale lr with the number of gpus')\n    args = parser.parse_args()\n    if 'LOCAL_RANK' not in os.environ:\n        os.environ['LOCAL_RANK'] = str(args.local_rank)\n\n    if args.options and args.cfg_options:\n        raise ValueError(\n            '--options and --cfg-options cannot be both specified, '\n            '--options is deprecated in favor of --cfg-options')\n    if args.options:\n        warnings.warn('--options is deprecated in favor of --cfg-options')\n        args.cfg_options = args.options\n\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    cfg = Config.fromfile(args.config)\n    if args.cfg_options is not None:\n        cfg.merge_from_dict(args.cfg_options)\n\n    # set multi-process settings\n    setup_multi_processes(cfg)\n\n    # set cudnn_benchmark\n    if cfg.get('cudnn_benchmark', False):\n        torch.backends.cudnn.benchmark = True\n\n    # work_dir is determined in this priority: CLI > segment in file > filename\n    if args.work_dir is not None:\n        # update configs according to CLI args if args.work_dir is not None\n        cfg.work_dir = args.work_dir\n    elif cfg.get('work_dir', None) is None:\n        # use config filename as default work_dir if cfg.work_dir is None\n        cfg.work_dir = osp.join('./work_dirs',\n                                osp.splitext(osp.basename(args.config))[0])\n    if osp.isfile(osp.join(cfg.work_dir, 'done.txt')):\n        print('job has finished, designed for NVIDIA ORD')\n        exit(1)\n\n    if args.resume_from is not None and osp.isfile(args.resume_from):\n        cfg.resume_from = args.resume_from\n    if cfg.resume_from is not None and not osp.isfile(cfg.resume_from):\n        cfg.resume_from = None\n\n    if args.auto_resume:\n        cfg.auto_resume = args.auto_resume\n        warnings.warn('`--auto-resume` is only supported when mmdet'\n                      'version >= 2.20.0 for 3D detection model or'\n                      'mmsegmentation verision >= 0.21.0 for 3D'\n                      'segmentation model')\n\n    if args.gpus is not None:\n        cfg.gpu_ids = range(1)\n        warnings.warn('`--gpus` is deprecated because we only support '\n                      'single GPU mode in non-distributed training. '\n                      'Use `gpus=1` now.')\n    if args.gpu_ids is not None:\n        cfg.gpu_ids = args.gpu_ids[0:1]\n        warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. '\n                      'Because we only support single GPU mode in '\n                      'non-distributed training. Use the first GPU '\n                      'in `gpu_ids` now.')\n    if args.gpus is None and args.gpu_ids is None:\n        cfg.gpu_ids = [args.gpu_id]\n\n    if args.autoscale_lr:\n        # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)\n        cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8\n\n    # init distributed env first, since logger depends on the dist info.\n    if args.launcher == 'none':\n        distributed = False\n        rank = 0\n    else:\n        distributed = True\n        init_dist(args.launcher, **cfg.dist_params)\n        # re-set gpu_ids with distributed training mode\n        rank, world_size = get_dist_info()\n        cfg.gpu_ids = range(world_size)\n        gpu = rank % torch.cuda.device_count()\n        os.environ['LOCAL_RANK'] = str(gpu)\n\n    for each in cfg.log_config['hooks']:\n        if each['type'] == 'WandbLoggerHook':\n            each['init_kwargs']['name'] = args.config.split('/')[-1]\n            each['init_kwargs']['config'] = dict()\n            each['init_kwargs']['resume'] = 'allow'\n            each['init_kwargs']['config']['job_id'] = os.environ.get('HOSTNAME','None')\n\n            each['init_kwargs']['config']['link'] = dict()\n            for key in ['model', 'lr_config', 'load_from', 'fp16', 'optimizer', 'data', 'train_pipeline', 'data_config']:\n                each['init_kwargs']['config'][key] = dict(cfg._cfg_dict).get(key, 'None')\n            break\n\n    # create work_dir\n    mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))\n    # dump config\n    cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))\n    # init the logger before other steps\n    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())\n    log_file = osp.join(cfg.work_dir, f'{timestamp}.log')\n    # specify logger name, if we still use 'mmdet', the output info will be\n    # filtered and won't be saved in the log_file\n    # TODO: ugly workaround to judge whether we are training det or seg model\n    if cfg.model.type in ['EncoderDecoder3D']:\n        logger_name = 'mmseg'\n    else:\n        logger_name = 'mmdet'\n    logger = get_root_logger(\n        log_file=log_file, log_level=cfg.log_level, name=logger_name)\n\n    # init the meta dict to record some important information such as\n    # environment info and seed, which will be logged\n    meta = dict()\n    # log env info\n    env_info_dict = collect_env()\n    env_info = '\\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])\n    dash_line = '-' * 60 + '\\n'\n    logger.info('Environment info:\\n' + dash_line + env_info + '\\n' +\n                dash_line)\n    meta['env_info'] = env_info\n    meta['config'] = cfg.pretty_text\n\n    # log some basic info\n    logger.info(f'Distributed training: {distributed}')\n    logger.info(f'Config:\\n{cfg.pretty_text}')\n\n    # set random seeds\n    seed = init_random_seed(args.seed)\n    seed = seed + dist.get_rank() if args.diff_seed else seed\n    logger.info(f'Set random seed to {seed}, '\n                f'deterministic: {args.deterministic}')\n    set_random_seed(seed, deterministic=args.deterministic)\n    cfg.seed = seed\n    meta['seed'] = seed\n    meta['exp_name'] = osp.basename(args.config)\n\n    model = build_model(\n        cfg.model,\n        train_cfg=cfg.get('train_cfg'),\n        test_cfg=cfg.get('test_cfg'))\n    model.init_weights()\n\n    sync_bn = cfg.get('sync_bn', False)\n    if distributed and sync_bn:\n        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)\n        print('Convert to SyncBatchNorm')\n\n    if 'trainable_components' in cfg:\n        logger.info(f\"param need to update:\", cfg['trainable_components'])\n        for key in cfg['trainable_components']:\n            for name, param in model.named_parameters():\n                if key not in name:\n                    param.requires_grad = False\n\n        from torch import nn\n        def fix_bn(m):\n            if isinstance(m, nn.BatchNorm1d) or isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.SyncBatchNorm):\n                m.track_running_stats = False\n        model.img_backbone.apply(fix_bn)\n        model.img_neck.apply(fix_bn)\n        model.depth_net.apply(fix_bn)\n        model.forward_projection.apply(fix_bn)\n        model.img_bev_encoder_backbone.apply(fix_bn)\n        model.img_bev_encoder_neck.apply(fix_bn)\n        model.pts_bbox_head.apply(fix_bn)\n        # model.pts_bbox_head.ego_fut_decoder.apply(fix_bn)\n        # model.pts_voxel_layer.apply(fix_bn)\n        # model.pts_voxel_encoder.apply(fix_bn)\n        # model.pts_middle_encoder.apply(fix_bn)\n        # model.pts_backbone.apply(fix_bn)\n        # model.pts_neck.apply(fix_bn)\n\n        \n    logger.info(f'Model:\\n{model}')\n    datasets = [build_dataset(cfg.data.train)]\n    if len(cfg.workflow) == 2:\n        val_dataset = copy.deepcopy(cfg.data.val)\n        # in case we use a dataset wrapper\n        if 'dataset' in cfg.data.train:\n            val_dataset.pipeline = cfg.data.train.dataset.pipeline\n        else:\n            val_dataset.pipeline = cfg.data.train.pipeline\n        # set test_mode=False here in deep copied config\n        # which do not affect AP/AR calculation later\n        # refer to https://mmdetection3d.readthedocs.io/en/latest/tutorials/customize_runtime.html#customize-workflow  # noqa\n        val_dataset.test_mode = False\n        datasets.append(build_dataset(val_dataset))\n    if cfg.checkpoint_config is not None:\n        # save mmdet version, config file content and class names in\n        # checkpoints as meta data\n        cfg.checkpoint_config.meta = dict(\n            mmdet_version=mmdet_version,\n            mmseg_version=mmseg_version,\n            mmdet3d_version=mmdet3d_version,\n            config=cfg.pretty_text,\n            CLASSES=datasets[0].CLASSES,\n            PALETTE=datasets[0].PALETTE  # for segmentors\n            if hasattr(datasets[0], 'PALETTE') else None)\n    # add an attribute for visualization convenience\n    model.CLASSES = datasets[0].CLASSES\n    torch.backends.cuda.matmul.allow_tf32 = True\n    torch.backends.cudnn.allow_tf32 = True\n    train_model(\n        model,\n        datasets,\n        cfg,\n        distributed=distributed,\n        validate=(not args.no_validate),\n        timestamp=timestamp,\n        meta=meta)\n    with open(osp.join(cfg.work_dir, 'done.txt'), 'w') as f:\n        f.write('done: ' + time.ctime())\n\n\nif __name__ == '__main__':\n    mp.set_start_method('spawn')\n    main()\n"
  },
  {
    "path": "tools/update_data_coords.py",
    "content": "import argparse\nimport time\nfrom os import path as osp\n\nimport mmcv\nimport numpy as np\n\nfrom mmdet3d.core.bbox import limit_period\n\n\ndef update_sunrgbd_infos(root_dir, out_dir, pkl_files):\n    print(f'{pkl_files} will be modified because '\n          f'of the refactor of the Depth coordinate system.')\n    if root_dir == out_dir:\n        print(f'Warning, you are overwriting '\n              f'the original data under {root_dir}.')\n        time.sleep(3)\n    for pkl_file in pkl_files:\n        in_path = osp.join(root_dir, pkl_file)\n        print(f'Reading from input file: {in_path}.')\n        a = mmcv.load(in_path)\n        print('Start updating:')\n        for item in mmcv.track_iter_progress(a):\n            if 'rotation_y' in item['annos']:\n                item['annos']['rotation_y'] = -item['annos']['rotation_y']\n                item['annos']['gt_boxes_upright_depth'][:, -1:] = \\\n                    -item['annos']['gt_boxes_upright_depth'][:, -1:]\n\n        out_path = osp.join(out_dir, pkl_file)\n        print(f'Writing to output file: {out_path}.')\n        mmcv.dump(a, out_path, 'pkl')\n\n\ndef update_outdoor_dbinfos(root_dir, out_dir, pkl_files):\n    print(f'{pkl_files} will be modified because '\n          f'of the refactor of the LIDAR coordinate system.')\n    if root_dir == out_dir:\n        print(f'Warning, you are overwriting '\n              f'the original data under {root_dir}.')\n        time.sleep(3)\n    for pkl_file in pkl_files:\n        in_path = osp.join(root_dir, pkl_file)\n        print(f'Reading from input file: {in_path}.')\n        a = mmcv.load(in_path)\n        print('Start updating:')\n        for k in a.keys():\n            print(f'Updating samples of class {k}:')\n            for item in mmcv.track_iter_progress(a[k]):\n                boxes = item['box3d_lidar'].copy()\n                # swap l, w (or dx, dy)\n                item['box3d_lidar'][3] = boxes[4]\n                item['box3d_lidar'][4] = boxes[3]\n                # change yaw\n                item['box3d_lidar'][6] = -boxes[6] - np.pi / 2\n                item['box3d_lidar'][6] = limit_period(\n                    item['box3d_lidar'][6], period=np.pi * 2)\n\n        out_path = osp.join(out_dir, pkl_file)\n        print(f'Writing to output file: {out_path}.')\n        mmcv.dump(a, out_path, 'pkl')\n\n\ndef update_nuscenes_or_lyft_infos(root_dir, out_dir, pkl_files):\n\n    print(f'{pkl_files} will be modified because '\n          f'of the refactor of the LIDAR coordinate system.')\n    if root_dir == out_dir:\n        print(f'Warning, you are overwriting '\n              f'the original data under {root_dir}.')\n        time.sleep(3)\n    for pkl_file in pkl_files:\n        in_path = osp.join(root_dir, pkl_file)\n        print(f'Reading from input file: {in_path}.')\n        a = mmcv.load(in_path)\n        print('Start updating:')\n        for item in mmcv.track_iter_progress(a['infos']):\n            boxes = item['gt_boxes'].copy()\n            # swap l, w (or dx, dy)\n            item['gt_boxes'][:, 3] = boxes[:, 4]\n            item['gt_boxes'][:, 4] = boxes[:, 3]\n            # change yaw\n            item['gt_boxes'][:, 6] = -boxes[:, 6] - np.pi / 2\n            item['gt_boxes'][:, 6] = limit_period(\n                item['gt_boxes'][:, 6], period=np.pi * 2)\n\n        out_path = osp.join(out_dir, pkl_file)\n        print(f'Writing to output file: {out_path}.')\n        mmcv.dump(a, out_path, 'pkl')\n\n\nparser = argparse.ArgumentParser(description='Arg parser for data coords '\n                                 'update due to coords sys refactor.')\nparser.add_argument('dataset', metavar='kitti', help='name of the dataset')\nparser.add_argument(\n    '--root-dir',\n    type=str,\n    default='./data/kitti',\n    help='specify the root dir of dataset')\nparser.add_argument(\n    '--version',\n    type=str,\n    default='v1.0',\n    required=False,\n    help='specify the dataset version, no need for kitti')\nparser.add_argument(\n    '--out-dir',\n    type=str,\n    default=None,\n    required=False,\n    help='name of info pkl')\nargs = parser.parse_args()\n\nif __name__ == '__main__':\n    if args.out_dir is None:\n        args.out_dir = args.root_dir\n    if args.dataset == 'kitti':\n        # KITTI infos is in CAM coord sys (unchanged)\n        # KITTI dbinfos is in LIDAR coord sys (changed)\n        # so we only update dbinfos\n        pkl_files = ['kitti_dbinfos_train.pkl']\n        update_outdoor_dbinfos(\n            root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files)\n    elif args.dataset == 'nuscenes':\n        # nuScenes infos is in LIDAR coord sys (changed)\n        # nuScenes dbinfos is in LIDAR coord sys (changed)\n        # so we update both infos and dbinfos\n        pkl_files = ['nuscenes_infos_val.pkl']\n        if args.version != 'v1.0-mini':\n            pkl_files.append('nuscenes_infos_train.pkl')\n        else:\n            pkl_files.append('nuscenes_infos_train_tiny.pkl')\n        update_nuscenes_or_lyft_infos(\n            root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files)\n        if args.version != 'v1.0-mini':\n            pkl_files = ['nuscenes_dbinfos_train.pkl']\n            update_outdoor_dbinfos(\n                root_dir=args.root_dir,\n                out_dir=args.out_dir,\n                pkl_files=pkl_files)\n    elif args.dataset == 'lyft':\n        # Lyft infos is in LIDAR coord sys (changed)\n        # Lyft has no dbinfos\n        # so we update infos\n        pkl_files = ['lyft_infos_train.pkl', 'lyft_infos_val.pkl']\n        update_nuscenes_or_lyft_infos(\n            root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files)\n    elif args.dataset == 'waymo':\n        # Waymo infos is in CAM coord sys (unchanged)\n        # Waymo dbinfos is in LIDAR coord sys (changed)\n        # so we only update dbinfos\n        pkl_files = ['waymo_dbinfos_train.pkl']\n        update_outdoor_dbinfos(\n            root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files)\n    elif args.dataset == 'scannet':\n        # ScanNet infos is in DEPTH coord sys (changed)\n        # but bbox is without yaw\n        # so ScanNet is unaffected\n        pass\n    elif args.dataset == 's3dis':\n        # Segmentation datasets are not affected\n        pass\n    elif args.dataset == 'sunrgbd':\n        # SUNRGBD infos is in DEPTH coord sys (changed)\n        # and bbox is with yaw\n        # so we update infos\n        pkl_files = ['sunrgbd_infos_train.pkl', 'sunrgbd_infos_val.pkl']\n        update_sunrgbd_infos(\n            root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files)\n"
  },
  {
    "path": "tools/update_data_coords.sh",
    "content": "#!/usr/bin/env bash\n\nset -x\nexport PYTHONPATH=`pwd`:$PYTHONPATH\n\nPARTITION=$1\nDATASET=$2\nGPUS=${GPUS:-1}\nGPUS_PER_NODE=${GPUS_PER_NODE:-1}\nSRUN_ARGS=${SRUN_ARGS:-\"\"}\nJOB_NAME=update_data_coords\n\nsrun -p ${PARTITION} \\\n    --job-name=${JOB_NAME} \\\n    --gres=gpu:${GPUS_PER_NODE} \\\n    --ntasks=${GPUS} \\\n    --ntasks-per-node=${GPUS_PER_NODE} \\\n    --kill-on-bad-exit=1 \\\n    ${SRUN_ARGS} \\\n    python -u tools/update_data_coords.py ${DATASET} \\\n            --root-dir ./data/${DATASET} \\\n            --out-dir ./data/${DATASET}\n"
  }
]