Repository: valeoai/xmuda Branch: master Commit: f368389d89b3 Files: 52 Total size: 214.1 KB Directory structure: gitextract_vpjbm8lv/ ├── .gitignore ├── LICENSE ├── README.md ├── configs/ │ ├── a2d2_semantic_kitti/ │ │ ├── baseline.yaml │ │ ├── xmuda.yaml │ │ └── xmuda_pl.yaml │ └── nuscenes/ │ ├── day_night/ │ │ ├── baseline.yaml │ │ ├── xmuda.yaml │ │ └── xmuda_pl.yaml │ └── usa_singapore/ │ ├── baseline.yaml │ ├── xmuda.yaml │ └── xmuda_pl.yaml ├── setup.py └── xmuda/ ├── common/ │ ├── config/ │ │ ├── __init__.py │ │ └── base.py │ ├── solver/ │ │ ├── __init__.py │ │ ├── build.py │ │ └── lr_scheduler.py │ └── utils/ │ ├── checkpoint.py │ ├── io.py │ ├── logger.py │ ├── metric_logger.py │ ├── sampler.py │ └── torch_util.py ├── config/ │ └── xmuda.py ├── data/ │ ├── a2d2/ │ │ ├── a2d2_dataloader.py │ │ ├── preprocess.py │ │ └── splits.py │ ├── build.py │ ├── collate.py │ ├── nuscenes/ │ │ ├── nuscenes_dataloader.py │ │ ├── preprocess.py │ │ ├── projection.py │ │ └── splits.py │ ├── semantic_kitti/ │ │ ├── preprocess.py │ │ ├── semantic_kitti_dataloader.py │ │ └── splits.py │ └── utils/ │ ├── augmentation_3d.py │ ├── evaluate.py │ ├── refine_pseudo_labels.py │ ├── turbo_cmap.py │ ├── validate.py │ └── visualize.py ├── models/ │ ├── build.py │ ├── losses.py │ ├── metric.py │ ├── resnet34_unet.py │ ├── scn_unet.py │ └── xmuda_arch.py ├── test.py ├── train_baseline.py └── train_xmuda.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ # compilation and distribution __pycache__ _ext *.pyc *.so build/ dist/ *.egg-info/ # Pycharm editor settings .idea ================================================ FILE: LICENSE ================================================ xMUDA Copyright 2020 Valeo Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Apache License Version 2.0, January 2004 https://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS ================================================ FILE: README.md ================================================ ## [Updated code](https://github.com/valeoai/xmuda_journal) from our TPAMI paper. # xMUDA: Cross-Modal Unsupervised Domain Adaptation for 3D Semantic Segmentation Official code for the paper. ## Paper ![](./teaser.png) [xMUDA: Cross-Modal Unsupervised Domain Adaptation for 3D Semantic Segmentation](https://arxiv.org/abs/1911.12676) [Maximilian Jaritz](https://team.inria.fr/rits/membres/maximilian-jaritz/), [Tuan-Hung Vu](https://tuanhungvu.github.io/), [Raoul de Charette](https://team.inria.fr/rits/membres/raoul-de-charette/), Émilie Wirbel, [Patrick Pérez](https://ptrckprz.github.io/) Inria, valeo.ai CVPR 2020 If you find this code useful for your research, please cite our [paper](https://arxiv.org/abs/1911.12676): ``` @inproceedings{jaritz2019xmuda, title={{xMUDA}: Cross-Modal Unsupervised Domain Adaptation for {3D} Semantic Segmentation}, author={Jaritz, Maximilian and Vu, Tuan-Hung and de Charette, Raoul and Wirbel, Emilie and P{\'e}rez, Patrick}, booktitle={CVPR}, year={2020} } ``` ## Preparation ### Prerequisites Tested with * PyTorch 1.4 * CUDA 10.0 * Python 3.8 * [SparseConvNet](https://github.com/facebookresearch/SparseConvNet) * [nuscenes-devkit](https://github.com/nutonomy/nuscenes-devkit) ### Installation As 3D network we use SparseConvNet. It requires to use CUDA 10.0 (it did not work with 10.1 when we tried). We advise to create a new conda environment for installation. PyTorch and CUDA can be installed, and SparseConvNet installed/compiled as follows: ``` $ conda install pytorch torchvision cudatoolkit=10.0 -c pytorch $ pip install --upgrade git+https://github.com/facebookresearch/SparseConvNet.git ``` Clone this repository and install it with pip. It will automatically install the nuscenes-devkit as a dependency. ``` $ git clone https://github.com/valeoai/xmuda.git $ cd xmuda $ pip install -ve . ``` The `-e` option means that you can edit the code on the fly. ### Datasets #### NuScenes Please download the Full dataset (v1.0) from the [NuScenes website](https://www.nuscenes.org) and extract it. You need to perform preprocessing to generate the data for xMUDA first. The preprocessing subsamples the 360° LiDAR point cloud to only keep the points that project into the front camera image. It also generates the point-wise segmentation labels using the 3D objects by checking which points lie inside the 3D boxes. All information will be stored in a pickle file (except the images which will be read frame by frame by the dataloader during training). Please edit the script `xmuda/data/nuscenes/preprocess.py` as follows and then run it. * `root_dir` should point to the root directory of the NuScenes dataset * `out_dir` should point to the desired output directory to store the pickle files #### A2D2 Please download the Semantic Segmentation dataset and Sensor Configuration from the [Audi website](https://www.a2d2.audi/a2d2/en/download.html) or directly use `wget` and the following links, then extract. ``` $ wget https://aev-autonomous-driving-dataset.s3.eu-central-1.amazonaws.com/camera_lidar_semantic.tar $ wget https://aev-autonomous-driving-dataset.s3.eu-central-1.amazonaws.com/cams_lidars.json ``` The dataset directory should have this basic structure: ``` a2d2 % A2D2 dataset root ├── 20180807_145028 ├── 20180810_142822 ├── ... ├── cams_lidars.json └── class_list.json ``` For preprocessing, we undistort the images and store them separately as .png files. Similar to NuScenes preprocessing, we save all points that project into the front camera image as well as the segmentation labels to a pickle file. Please edit the script `xmuda/data/a2d2/preprocess.py` as follows and then run it. * `root_dir` should point to the root directory of the A2D2 dataset * `out_dir` should point to the desired output directory to store the undistorted images and pickle files. It should be set differently than the `root_dir` to prevent overwriting of images. #### SemanticKITTI Please download the files from the [SemanticKITTI website](http://semantic-kitti.org/dataset.html) and additionally the [color data](http://www.cvlibs.net/download.php?file=data_odometry_color.zip) from the [Kitti Odometry website](http://www.cvlibs.net/datasets/kitti/eval_odometry.php). Extract everything into the same folder. Similar to NuScenes preprocessing, we save all points that project into the front camera image as well as the segmentation labels to a pickle file. Please edit the script `xmuda/data/semantic_kitti/preprocess.py` as follows and then run it. * `root_dir` should point to the root directory of the SemanticKITTI dataset * `out_dir` should point to the desired output directory to store the pickle files ## Training ### xMUDA You can run the training with ``` $ cd $ python xmuda/train_xmuda.py --cfg=configs/nuscenes/usa_singapore/xmuda.yaml ``` The output will be written to `/home//workspace/outputs/xmuda/` by default. The `OUTPUT_DIR` can be modified in the config file in (e.g. `configs/nuscenes/usa_singapore/xmuda.yaml`) or optionally at run time in the command line (dominates over config file). Note that `@` in the following example will be automatically replaced with the config path, i.e. with `nuscenes/usa_singapore/xmuda`. ``` $ python xmuda/train_xmuda.py --cfg=configs/nuscenes/usa_singapore/xmuda.yaml OUTPUT_DIR path/to/output/directory/@ ``` You can start the trainings on the other UDA scenarios (Day/Night and A2D2/SemanticKITTI) analogously: ``` $ python xmuda/train_xmuda.py --cfg=configs/nuscenes/day_night/xmuda.yaml $ python xmuda/train_xmuda.py --cfg=configs/a2d2_semantic_kitti/xmuda.yaml ``` ### xMUDAPL After having trained the xMUDA model, generate the pseudo-labels as follows: ``` $ python xmuda/test.py --cfg=configs/nuscenes/usa_singapore/xmuda.yaml --pselab @/model_2d_100000.pth @/model_3d_100000.pth DATASET_TARGET.TEST "('train_singapore',)" ``` Note that we use the last model at 100,000 steps to exclude supervision from the validation set by picking the best weights. The pseudo labels and maximum probabilities are saved as `.npy` file. Please edit the `pselab_paths` in the config file, e.g. `configs/nuscenes/usa_singapore/xmuda_pl.yaml`, to match your path of the generated pseudo-labels. Then start the training. The pseudo-label refinement (discard less confident pseudo-labels) is done when the dataloader is initialized. ``` $ python xmuda/train_xmuda.py --cfg=configs/nuscenes/usa_singapore/xmuda_pl.yaml ``` You can start the trainings on the other UDA scenarios (Day/Night and A2D2/SemanticKITTI) analogously: ``` $ python xmuda/test.py --cfg=configs/nuscenes/day_night/xmuda.yaml --pselab @/model_2d_100000.pth @/model_3d_100000.pth DATASET_TARGET.TEST "('train_night',)" $ python xmuda/train_xmuda.py --cfg=configs/nuscenes/day_night/xmuda_pl.yaml # use batch size 1, because of different image sizes Kitti $ python xmuda/test.py --cfg=configs/a2d2_semantic_kitti/xmuda.yaml --pselab @/model_2d_100000.pth @/model_3d_100000.pth DATASET_TARGET.TEST "('train',)" VAL.BATCH_SIZE 1 $ python xmuda/train_xmuda.py --cfg=configs/a2d2_semantic_kitti/xmuda_pl.yaml ``` ### Baseline Train the baselines (only on source) with: ``` $ python xmuda/train_baseline.py --cfg=configs/nuscenes/usa_singapore/baseline.yaml $ python xmuda/train_baseline.py --cfg=configs/nuscenes/day_night/baseline.yaml $ python xmuda/train_baseline.py --cfg=configs/a2d2_semantic_kitti/baseline.yaml ``` ## Testing You can provide which checkpoints you want to use for testing. We used the ones that performed best on the validation set during training (the best val iteration for 2D and 3D is shown at the end of each training). Note that `@` will be replaced by the output directory for that config file. For example: ``` $ cd $ python xmuda/test.py --cfg=configs/nuscenes/usa_singapore/xmuda.yaml @/model_2d_065000.pth @/model_3d_095000.pth ``` You can also provide an absolute path without `@`. ## Model Zoo You can download the models with the scores below from [this Google drive folder](https://drive.google.com/drive/folders/16MTKz4LOIwqQc3Vo6LAGrpiIC72hvggc?usp=sharing). | Method | USA/Singapore 2D | USA/Singapore 3D | Day/Night 2D | Day/Night 3D | A2D2/Sem.KITTI 2D | A2D2/Sem.KITTI 3D | | --- | --- | --- | --- | --- | --- | --- | | Baseline (source only) | 53.4 | 46.5 | 42.2 | 41.2 | 34.2* | 35.9* | xMUDA | 59.3 | 52.0 | 46.2 | 44.2 | 38.3* | 46.0* | xMUDAPL |61.1 | 54.1 | 47.1 | 46.7 | 41.2* | 49.8* * Slight differences from the paper on A2D2/Sem.KITTI: Now we use class weights computed on source. In the paper, we falsely computed class weights on the target domain. ## Acknowledgements Note that this code borrows from the [MVPNet](https://github.com/maxjaritz/mvpnet) repo. ## License xMUDA is released under the [Apache 2.0 license](./LICENSE). ================================================ FILE: configs/a2d2_semantic_kitti/baseline.yaml ================================================ MODEL_2D: TYPE: "UNetResNet34" NUM_CLASSES: 10 MODEL_3D: TYPE: "SCN" NUM_CLASSES: 10 DATASET_SOURCE: TYPE: "A2D2SCN" TRAIN: ("train",) A2D2SCN: preprocess_dir: "/datasets_local/datasets_mjaritz/a2d2_preprocess" DATASET_TARGET: TYPE: "SemanticKITTISCN" VAL: ("val",) TEST: ("test",) SemanticKITTISCN: preprocess_dir: "/datasets_local/datasets_mjaritz/semantic_kitti_preprocess/preprocess" semantic_kitti_dir: "/datasets_local/datasets_mjaritz/semantic_kitti_preprocess" DATALOADER: NUM_WORKERS: 4 OPTIMIZER: TYPE: "Adam" BASE_LR: 0.001 SCHEDULER: TYPE: "MultiStepLR" MultiStepLR: gamma: 0.1 milestones: (80000, 90000) MAX_ITERATION: 100000 TRAIN: BATCH_SIZE: 8 SUMMARY_PERIOD: 50 CHECKPOINT_PERIOD: 5000 CLASS_WEIGHTS: [1.89090012, 2.0585112, 3.1970535, 3.1111633, 1., 2.93751704, 1.92053733, 1.47886874, 1.04654198, 1.78266561] VAL: BATCH_SIZE: 8 PERIOD: 5000 #OUTPUT_DIR: "path/to/output/directory/@" # @ will be replaced with config path, e.g. nuscenes/usa_singapore/xmuda ================================================ FILE: configs/a2d2_semantic_kitti/xmuda.yaml ================================================ MODEL_2D: TYPE: "UNetResNet34" DUAL_HEAD: True NUM_CLASSES: 10 MODEL_3D: TYPE: "SCN" DUAL_HEAD: True NUM_CLASSES: 10 DATASET_SOURCE: TYPE: "A2D2SCN" TRAIN: ("train",) A2D2SCN: preprocess_dir: "/datasets_local/datasets_mjaritz/a2d2_preprocess" DATASET_TARGET: TYPE: "SemanticKITTISCN" TRAIN: ("train",) VAL: ("val",) TEST: ("test",) SemanticKITTISCN: preprocess_dir: "/datasets_local/datasets_mjaritz/semantic_kitti_preprocess/preprocess" semantic_kitti_dir: "/datasets_local/datasets_mjaritz/semantic_kitti_preprocess" DATALOADER: NUM_WORKERS: 4 OPTIMIZER: TYPE: "Adam" BASE_LR: 0.001 SCHEDULER: TYPE: "MultiStepLR" MultiStepLR: gamma: 0.1 milestones: (80000, 90000) MAX_ITERATION: 100000 TRAIN: BATCH_SIZE: 8 SUMMARY_PERIOD: 50 CHECKPOINT_PERIOD: 5000 CLASS_WEIGHTS: [1.89090012, 2.0585112, 3.1970535, 3.1111633, 1., 2.93751704, 1.92053733, 1.47886874, 1.04654198, 1.78266561] XMUDA: lambda_xm_src: 0.1 lambda_xm_trg: 0.01 VAL: BATCH_SIZE: 2 PERIOD: 5000 #OUTPUT_DIR: "path/to/output/directory/@" # @ will be replaced with config path, e.g. nuscenes/usa_singapore/xmuda ================================================ FILE: configs/a2d2_semantic_kitti/xmuda_pl.yaml ================================================ MODEL_2D: TYPE: "UNetResNet34" DUAL_HEAD: True NUM_CLASSES: 10 MODEL_3D: TYPE: "SCN" DUAL_HEAD: True NUM_CLASSES: 10 DATASET_SOURCE: TYPE: "A2D2SCN" TRAIN: ("train",) A2D2SCN: preprocess_dir: "/datasets_local/datasets_mjaritz/a2d2_preprocess" DATASET_TARGET: TYPE: "SemanticKITTISCN" TRAIN: ("train",) VAL: ("val",) TEST: ("test",) SemanticKITTISCN: preprocess_dir: "/datasets_local/datasets_mjaritz/semantic_kitti_preprocess/preprocess" semantic_kitti_dir: "/datasets_local/datasets_mjaritz/semantic_kitti_preprocess" pselab_paths: ("/home/docker_user/workspace/outputs/xmuda/a2d2_semantic_kitti/xmuda/pselab_data/train.npy",) DATALOADER: NUM_WORKERS: 4 OPTIMIZER: TYPE: "Adam" BASE_LR: 0.001 SCHEDULER: TYPE: "MultiStepLR" MultiStepLR: gamma: 0.1 milestones: (80000, 90000) MAX_ITERATION: 100000 TRAIN: BATCH_SIZE: 8 SUMMARY_PERIOD: 50 CHECKPOINT_PERIOD: 5000 CLASS_WEIGHTS: [1.89090012, 2.0585112, 3.1970535, 3.1111633, 1., 2.93751704, 1.92053733, 1.47886874, 1.04654198, 1.78266561] XMUDA: lambda_xm_src: 0.1 lambda_xm_trg: 0.01 lambda_pl: 1.0 VAL: BATCH_SIZE: 2 PERIOD: 5000 #OUTPUT_DIR: "path/to/output/directory/@" # @ will be replaced with config path, e.g. nuscenes/usa_singapore/xmuda ================================================ FILE: configs/nuscenes/day_night/baseline.yaml ================================================ MODEL_2D: TYPE: "UNetResNet34" MODEL_3D: TYPE: "SCN" DATASET_SOURCE: TYPE: "NuScenesSCN" TRAIN: ("train_day",) NuScenesSCN: preprocess_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess/preprocess" nuscenes_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess" # only front cam images are needed DATASET_TARGET: TYPE: "NuScenesSCN" VAL: ("val_night",) TEST: ("test_night",) NuScenesSCN: preprocess_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess/preprocess" nuscenes_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess" # only front cam images are needed DATALOADER: NUM_WORKERS: 4 OPTIMIZER: TYPE: "Adam" BASE_LR: 0.001 SCHEDULER: TYPE: "MultiStepLR" MultiStepLR: gamma: 0.1 milestones: (80000, 90000) MAX_ITERATION: 100000 TRAIN: BATCH_SIZE: 8 SUMMARY_PERIOD: 50 CHECKPOINT_PERIOD: 5000 CLASS_WEIGHTS: [2.68678412, 4.36182969, 5.47896839, 3.89026883, 1.] VAL: BATCH_SIZE: 32 PERIOD: 5000 #OUTPUT_DIR: "path/to/output/directory/@" # @ will be replaced with config path, e.g. nuscenes/usa_singapore/xmuda ================================================ FILE: configs/nuscenes/day_night/xmuda.yaml ================================================ MODEL_2D: TYPE: "UNetResNet34" DUAL_HEAD: True MODEL_3D: TYPE: "SCN" DUAL_HEAD: True DATASET_SOURCE: TYPE: "NuScenesSCN" TRAIN: ("train_day",) NuScenesSCN: preprocess_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess/preprocess" nuscenes_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess" # only front cam images are needed DATASET_TARGET: TYPE: "NuScenesSCN" TRAIN: ("train_night",) VAL: ("val_night",) TEST: ("test_night",) NuScenesSCN: preprocess_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess/preprocess" nuscenes_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess" # only front cam images are needed DATALOADER: NUM_WORKERS: 4 OPTIMIZER: TYPE: "Adam" BASE_LR: 0.001 SCHEDULER: TYPE: "MultiStepLR" MultiStepLR: gamma: 0.1 milestones: (80000, 90000) MAX_ITERATION: 100000 TRAIN: BATCH_SIZE: 8 SUMMARY_PERIOD: 50 CHECKPOINT_PERIOD: 5000 CLASS_WEIGHTS: [2.68678412, 4.36182969, 5.47896839, 3.89026883, 1.] XMUDA: lambda_xm_src: 1.0 lambda_xm_trg: 0.1 VAL: BATCH_SIZE: 32 PERIOD: 5000 #OUTPUT_DIR: "path/to/output/directory/@" # @ will be replaced with config path, e.g. nuscenes/usa_singapore/xmuda ================================================ FILE: configs/nuscenes/day_night/xmuda_pl.yaml ================================================ MODEL_2D: TYPE: "UNetResNet34" DUAL_HEAD: True MODEL_3D: TYPE: "SCN" DUAL_HEAD: True DATASET_SOURCE: TYPE: "NuScenesSCN" TRAIN: ("train_day",) NuScenesSCN: preprocess_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess/preprocess" nuscenes_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess" # only front cam images are needed DATASET_TARGET: TYPE: "NuScenesSCN" TRAIN: ("train_night",) VAL: ("val_night",) TEST: ("test_night",) NuScenesSCN: preprocess_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess/preprocess" nuscenes_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess" # only front cam images are needed pselab_paths: ("/home/docker_user/workspace/outputs/xmuda/nuscenes/day_night/xmuda/pselab_data/train_night.npy",) DATALOADER: NUM_WORKERS: 4 OPTIMIZER: TYPE: "Adam" BASE_LR: 0.001 SCHEDULER: TYPE: "MultiStepLR" MultiStepLR: gamma: 0.1 milestones: (80000, 90000) MAX_ITERATION: 100000 TRAIN: BATCH_SIZE: 8 SUMMARY_PERIOD: 50 CHECKPOINT_PERIOD: 5000 CLASS_WEIGHTS: [2.68678412, 4.36182969, 5.47896839, 3.89026883, 1.] XMUDA: lambda_xm_src: 1.0 lambda_xm_trg: 0.1 lambda_pl: 1.0 VAL: BATCH_SIZE: 32 PERIOD: 5000 #OUTPUT_DIR: "path/to/output/directory/@" # @ will be replaced with config path, e.g. nuscenes/usa_singapore/xmuda ================================================ FILE: configs/nuscenes/usa_singapore/baseline.yaml ================================================ MODEL_2D: TYPE: "UNetResNet34" MODEL_3D: TYPE: "SCN" DATASET_SOURCE: TYPE: "NuScenesSCN" TRAIN: ("train_usa",) NuScenesSCN: preprocess_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess/preprocess" nuscenes_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess" # only front cam images are needed DATASET_TARGET: TYPE: "NuScenesSCN" VAL: ("val_singapore",) TEST: ("test_singapore",) NuScenesSCN: preprocess_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess/preprocess" nuscenes_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess" # only front cam images are needed DATALOADER: NUM_WORKERS: 4 OPTIMIZER: TYPE: "Adam" BASE_LR: 0.001 SCHEDULER: TYPE: "MultiStepLR" MultiStepLR: gamma: 0.1 milestones: (80000, 90000) MAX_ITERATION: 100000 TRAIN: BATCH_SIZE: 8 SUMMARY_PERIOD: 50 CHECKPOINT_PERIOD: 5000 CLASS_WEIGHTS: [2.47956584, 4.26788384, 5.71114131, 3.80241668, 1.] VAL: BATCH_SIZE: 32 PERIOD: 5000 #OUTPUT_DIR: "path/to/output/directory/@" # @ will be replaced with config path, e.g. nuscenes/usa_singapore/xmuda ================================================ FILE: configs/nuscenes/usa_singapore/xmuda.yaml ================================================ MODEL_2D: TYPE: "UNetResNet34" DUAL_HEAD: True MODEL_3D: TYPE: "SCN" DUAL_HEAD: True DATASET_SOURCE: TYPE: "NuScenesSCN" TRAIN: ("train_usa",) NuScenesSCN: preprocess_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess/preprocess" nuscenes_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess" # only front cam images are needed DATASET_TARGET: TYPE: "NuScenesSCN" TRAIN: ("train_singapore",) VAL: ("val_singapore",) TEST: ("test_singapore",) NuScenesSCN: preprocess_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess/preprocess" nuscenes_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess" # only front cam images are needed DATALOADER: NUM_WORKERS: 4 OPTIMIZER: TYPE: "Adam" BASE_LR: 0.001 SCHEDULER: TYPE: "MultiStepLR" MultiStepLR: gamma: 0.1 milestones: (80000, 90000) MAX_ITERATION: 100000 TRAIN: BATCH_SIZE: 8 SUMMARY_PERIOD: 50 CHECKPOINT_PERIOD: 5000 CLASS_WEIGHTS: [2.47956584, 4.26788384, 5.71114131, 3.80241668, 1.] XMUDA: lambda_xm_src: 1.0 lambda_xm_trg: 0.1 VAL: BATCH_SIZE: 32 PERIOD: 5000 #OUTPUT_DIR: "path/to/output/directory/@" # @ will be replaced with config path, e.g. nuscenes/usa_singapore/xmuda ================================================ FILE: configs/nuscenes/usa_singapore/xmuda_pl.yaml ================================================ MODEL_2D: TYPE: "UNetResNet34" DUAL_HEAD: True MODEL_3D: TYPE: "SCN" DUAL_HEAD: True DATASET_SOURCE: TYPE: "NuScenesSCN" TRAIN: ("train_usa",) NuScenesSCN: preprocess_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess/preprocess" nuscenes_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess" # only front cam images are needed DATASET_TARGET: TYPE: "NuScenesSCN" TRAIN: ("train_singapore",) VAL: ("val_singapore",) TEST: ("test_singapore",) NuScenesSCN: preprocess_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess/preprocess" nuscenes_dir: "/datasets_local/datasets_mjaritz/nuscenes_preprocess" # only front cam images are needed pselab_paths: ("/home/docker_user/workspace/outputs/xmuda/nuscenes/usa_singapore/xmuda/pselab_data/train_singapore.npy",) DATALOADER: NUM_WORKERS: 4 OPTIMIZER: TYPE: "Adam" BASE_LR: 0.001 SCHEDULER: TYPE: "MultiStepLR" MultiStepLR: gamma: 0.1 milestones: (80000, 90000) MAX_ITERATION: 100000 TRAIN: BATCH_SIZE: 8 SUMMARY_PERIOD: 50 CHECKPOINT_PERIOD: 5000 CLASS_WEIGHTS: [2.47956584, 4.26788384, 5.71114131, 3.80241668, 1.] XMUDA: lambda_xm_src: 1.0 lambda_xm_trg: 0.1 lambda_pl: 1.0 VAL: BATCH_SIZE: 32 PERIOD: 5000 #OUTPUT_DIR: "path/to/output/directory/@" # @ will be replaced with config path, e.g. nuscenes/usa_singapore/xmuda ================================================ FILE: setup.py ================================================ from setuptools import setup from setuptools import find_packages exclude_dirs = ("configs",) # for install, do: pip install -ve . setup( name='xmuda', version="0.0.1", url="https://github.com/maxjaritz/xmuda", description="xMUDA: Cross-Modal Unsupervised Domain Adaptation for 3D Semantic Segmentation", install_requires=['yacs', 'nuscenes-devkit', 'tabulate'], packages=find_packages(exclude=exclude_dirs), ) ================================================ FILE: xmuda/common/config/__init__.py ================================================ from yacs.config import CfgNode def purge_cfg(cfg: CfgNode): """Purge configuration for clean logs and logical check. If a CfgNode has 'TYPE' attribute, its CfgNode children the key of which do not contain 'TYPE' will be removed. """ target_key = cfg.get('TYPE', None) removed_keys = [] for k, v in cfg.items(): if isinstance(v, CfgNode): if target_key is not None and (k != target_key): removed_keys.append(k) else: purge_cfg(v) for k in removed_keys: del cfg[k] ================================================ FILE: xmuda/common/config/base.py ================================================ """Basic experiments configuration For different tasks, a specific configuration might be created by importing this basic config. """ from yacs.config import CfgNode as CN # ---------------------------------------------------------------------------- # # Config definition # ---------------------------------------------------------------------------- # _C = CN() # ---------------------------------------------------------------------------- # # Resume # ---------------------------------------------------------------------------- # # Automatically resume weights from last checkpoints _C.AUTO_RESUME = True # Whether to resume the optimizer and the scheduler _C.RESUME_STATES = True # Path of weights to resume _C.RESUME_PATH = '' # ---------------------------------------------------------------------------- # # Model # ---------------------------------------------------------------------------- # _C.MODEL = CN() _C.MODEL.TYPE = '' # ---------------------------------------------------------------------------- # # DataLoader # ---------------------------------------------------------------------------- # _C.DATALOADER = CN() # Number of data loading threads _C.DATALOADER.NUM_WORKERS = 0 # Whether to drop last _C.DATALOADER.DROP_LAST = True # ---------------------------------------------------------------------------- # # Optimizer # ---------------------------------------------------------------------------- # _C.OPTIMIZER = CN() _C.OPTIMIZER.TYPE = '' # Basic parameters of the optimizer # Note that the learning rate should be changed according to batch size _C.OPTIMIZER.BASE_LR = 0.001 _C.OPTIMIZER.WEIGHT_DECAY = 0.0 # Specific parameters of optimizers _C.OPTIMIZER.SGD = CN() _C.OPTIMIZER.SGD.momentum = 0.9 _C.OPTIMIZER.SGD.dampening = 0.0 _C.OPTIMIZER.Adam = CN() _C.OPTIMIZER.Adam.betas = (0.9, 0.999) # ---------------------------------------------------------------------------- # # Scheduler (learning rate schedule) # ---------------------------------------------------------------------------- # _C.SCHEDULER = CN() _C.SCHEDULER.TYPE = '' _C.SCHEDULER.MAX_ITERATION = 1 # Minimum learning rate. 0.0 for disable. _C.SCHEDULER.CLIP_LR = 0.0 # Specific parameters of schedulers _C.SCHEDULER.StepLR = CN() _C.SCHEDULER.StepLR.step_size = 0 _C.SCHEDULER.StepLR.gamma = 0.1 _C.SCHEDULER.MultiStepLR = CN() _C.SCHEDULER.MultiStepLR.milestones = () _C.SCHEDULER.MultiStepLR.gamma = 0.1 # ---------------------------------------------------------------------------- # # Specific train options # ---------------------------------------------------------------------------- # _C.TRAIN = CN() # Batch size _C.TRAIN.BATCH_SIZE = 1 # Period to save checkpoints. 0 for disable _C.TRAIN.CHECKPOINT_PERIOD = 0 # Period to log training status. 0 for disable _C.TRAIN.LOG_PERIOD = 50 # Period to summary training status. 0 for disable _C.TRAIN.SUMMARY_PERIOD = 0 # Max number of checkpoints to keep _C.TRAIN.MAX_TO_KEEP = 100 # Regex patterns of modules and/or parameters to freeze _C.TRAIN.FROZEN_PATTERNS = () # ---------------------------------------------------------------------------- # # Specific validation options # ---------------------------------------------------------------------------- # _C.VAL = CN() # Batch size _C.VAL.BATCH_SIZE = 1 # Period to validate. 0 for disable _C.VAL.PERIOD = 0 # Period to log validation status. 0 for disable _C.VAL.LOG_PERIOD = 20 # The metric for best validation performance _C.VAL.METRIC = '' # ---------------------------------------------------------------------------- # # Misc options # ---------------------------------------------------------------------------- # # if set to @, the filename of config will be used by default _C.OUTPUT_DIR = '@' # For reproducibility...but not really because modern fast GPU libraries use # non-deterministic op implementations # -1 means use time seed. _C.RNG_SEED = 1 ================================================ FILE: xmuda/common/solver/__init__.py ================================================ ================================================ FILE: xmuda/common/solver/build.py ================================================ """Build optimizers and schedulers""" import warnings import torch from .lr_scheduler import ClipLR def build_optimizer(cfg, model): name = cfg.OPTIMIZER.TYPE if name == '': warnings.warn('No optimizer is built.') return None elif hasattr(torch.optim, name): return getattr(torch.optim, name)( model.parameters(), lr=cfg.OPTIMIZER.BASE_LR, weight_decay=cfg.OPTIMIZER.WEIGHT_DECAY, **cfg.OPTIMIZER.get(name, dict()), ) else: raise ValueError('Unsupported type of optimizer.') def build_scheduler(cfg, optimizer): name = cfg.SCHEDULER.TYPE if name == '': warnings.warn('No scheduler is built.') return None elif hasattr(torch.optim.lr_scheduler, name): scheduler = getattr(torch.optim.lr_scheduler, name)( optimizer, **cfg.SCHEDULER.get(name, dict()), ) else: raise ValueError('Unsupported type of scheduler.') # clip learning rate if cfg.SCHEDULER.CLIP_LR > 0.0: print('Learning rate is clipped to {}'.format(cfg.SCHEDULER.CLIP_LR)) scheduler = ClipLR(scheduler, min_lr=cfg.SCHEDULER.CLIP_LR) return scheduler ================================================ FILE: xmuda/common/solver/lr_scheduler.py ================================================ from __future__ import division from bisect import bisect_right from torch.optim.lr_scheduler import _LRScheduler, MultiStepLR class WarmupMultiStepLR(_LRScheduler): """https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/solver/lr_scheduler.py""" def __init__( self, optimizer, milestones, gamma=0.1, warmup_factor=0.1, warmup_steps=1, warmup_method="linear", last_epoch=-1, ): if not list(milestones) == sorted(milestones): raise ValueError( "Milestones should be a list of" " increasing integers. Got {}", milestones, ) if warmup_method not in ("constant", "linear"): raise ValueError( "Only 'constant' or 'linear' warmup_method accepted" "got {}".format(warmup_method) ) self.milestones = milestones self.gamma = gamma self.warmup_factor = warmup_factor self.warmup_steps = warmup_steps self.warmup_method = warmup_method super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch) def get_lr(self): warmup_factor = 1 if self.last_epoch < self.warmup_steps: if self.warmup_method == "constant": warmup_factor = self.warmup_factor elif self.warmup_method == "linear": alpha = float(self.last_epoch) / self.warmup_steps warmup_factor = self.warmup_factor * (1 - alpha) + alpha return [ base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch) for base_lr in self.base_lrs ] class ClipLR(object): """Clip the learning rate of a given scheduler. Same interfaces of _LRScheduler should be implemented. Args: scheduler (_LRScheduler): an instance of _LRScheduler. min_lr (float): minimum learning rate. """ def __init__(self, scheduler, min_lr=1e-5): assert isinstance(scheduler, _LRScheduler) self.scheduler = scheduler self.min_lr = min_lr def get_lr(self): return [max(self.min_lr, lr) for lr in self.scheduler.get_lr()] def __getattr__(self, item): if hasattr(self.scheduler, item): return getattr(self.scheduler, item) else: return getattr(self, item) ================================================ FILE: xmuda/common/utils/checkpoint.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # Modified by Jiayuan Gu import os import logging import torch from torch.nn.parallel import DataParallel, DistributedDataParallel from .io import get_md5 class Checkpointer(object): """Checkpoint the model and relevant states. Supported features: 1. Resume optimizer and scheduler 2. Automatically deal with DataParallel, DistributedDataParallel 3. Resume last saved checkpoint """ def __init__(self, model, optimizer=None, scheduler=None, save_dir='', logger=None, postfix='' ): self.model = model self.optimizer = optimizer self.scheduler = scheduler self.save_dir = save_dir # logging self.logger = logger self._print = logger.info if logger else print self.postfix = postfix def save(self, name, tag=True, **kwargs): if not self.save_dir: return data = dict() if isinstance(self.model, (DataParallel, DistributedDataParallel)): data['model'] = self.model.module.state_dict() else: data['model'] = self.model.state_dict() if self.optimizer is not None: data['optimizer'] = self.optimizer.state_dict() if self.scheduler is not None: data['scheduler'] = self.scheduler.state_dict() data.update(kwargs) save_file = os.path.join(self.save_dir, '{}.pth'.format(name)) self._print('Saving checkpoint to {}'.format(os.path.abspath(save_file))) torch.save(data, save_file) if tag: self.tag_last_checkpoint(save_file) def load(self, path=None, resume=True, resume_states=True): if resume and self.has_checkpoint(): # override argument with existing checkpoint path = self.get_checkpoint_file() if not path: # no checkpoint could be found self._print('No checkpoint found. Initializing model from scratch') return {} self._print('Loading checkpoint from {}, MD5: {}'.format(path, get_md5(path))) checkpoint = self._load_file(path) if isinstance(self.model, (DataParallel, DistributedDataParallel)): self.model.module.load_state_dict(checkpoint.pop('model')) else: self.model.load_state_dict(checkpoint.pop('model')) if resume_states: if 'optimizer' in checkpoint and self.optimizer: self.logger.info('Loading optimizer from {}'.format(path)) self.optimizer.load_state_dict(checkpoint.pop('optimizer')) if 'scheduler' in checkpoint and self.scheduler: self.logger.info('Loading scheduler from {}'.format(path)) self.scheduler.load_state_dict(checkpoint.pop('scheduler')) else: checkpoint = {} # return any further checkpoint data return checkpoint def has_checkpoint(self): save_file = os.path.join(self.save_dir, 'last_checkpoint' + self.postfix) return os.path.exists(save_file) def get_checkpoint_file(self): save_file = os.path.join(self.save_dir, 'last_checkpoint' + self.postfix) try: with open(save_file, 'r') as f: last_saved = f.read() # If not absolute path, add save_dir as prefix if not os.path.isabs(last_saved): last_saved = os.path.join(self.save_dir, last_saved) except IOError: # If file doesn't exist, maybe because it has just been # deleted by a separate process last_saved = '' return last_saved def tag_last_checkpoint(self, last_filename): save_file = os.path.join(self.save_dir, 'last_checkpoint' + self.postfix) # If not absolute path, only save basename if not os.path.isabs(last_filename): last_filename = os.path.basename(last_filename) with open(save_file, 'w') as f: f.write(last_filename) def _load_file(self, path): return torch.load(path, map_location=torch.device('cpu')) class CheckpointerV2(Checkpointer): """Support max_to_keep like tf.Saver""" def __init__(self, *args, max_to_keep=5, **kwargs): super(CheckpointerV2, self).__init__(*args, **kwargs) self.max_to_keep = max_to_keep self._last_checkpoints = [] def get_checkpoint_file(self): save_file = os.path.join(self.save_dir, 'last_checkpoint' + self.postfix) try: self._last_checkpoints = self._load_last_checkpoints(save_file) last_saved = self._last_checkpoints[-1] except (IOError, IndexError): # If file doesn't exist, maybe because it has just been # deleted by a separate process last_saved = '' return last_saved def tag_last_checkpoint(self, last_filename): save_file = os.path.join(self.save_dir, 'last_checkpoint' + self.postfix) # Remove first from list if the same name was used before. for path in self._last_checkpoints: if last_filename == path: self._last_checkpoints.remove(path) # Append new path to list self._last_checkpoints.append(last_filename) # If more than max_to_keep, remove the oldest. self._delete_old_checkpoint() # Dump last checkpoints to a file self._save_checkpoint_file(save_file) def _delete_old_checkpoint(self): if len(self._last_checkpoints) > self.max_to_keep: path = self._last_checkpoints.pop(0) try: os.remove(path) except Exception as e: logging.warning("Ignoring: %s", str(e)) def _save_checkpoint_file(self, path): with open(path, 'w') as f: lines = [] for p in self._last_checkpoints: if not os.path.isabs(p): # If not absolute path, only save basename p = os.path.basename(p) lines.append(p) f.write('\n'.join(lines)) def _load_last_checkpoints(self, path): last_checkpoints = [] with open(path, 'r') as f: for p in f.readlines(): if not os.path.isabs(p): # If not absolute path, add save_dir as prefix p = os.path.join(self.save_dir, p) last_checkpoints.append(p) return last_checkpoints ================================================ FILE: xmuda/common/utils/io.py ================================================ import hashlib def get_md5(filename): hash_obj = hashlib.md5() with open(filename, 'rb') as f: hash_obj.update(f.read()) return hash_obj.hexdigest() ================================================ FILE: xmuda/common/utils/logger.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # Modified by Jiayuan Gu import logging import os import sys def setup_logger(name, save_dir, comment=''): logger = logging.getLogger(name) logger.setLevel(logging.DEBUG) ch = logging.StreamHandler(stream=sys.stdout) ch.setLevel(logging.DEBUG) formatter = logging.Formatter('%(asctime)s %(name)s %(levelname)s: %(message)s') ch.setFormatter(formatter) logger.addHandler(ch) if save_dir: filename = 'log' if comment: filename += '.' + comment log_file = os.path.join(save_dir, filename + '.txt') fh = logging.FileHandler(log_file) fh.setLevel(logging.DEBUG) fh.setFormatter(formatter) logger.addHandler(fh) return logger ================================================ FILE: xmuda/common/utils/metric_logger.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # Modified by Jiayuan Gu from __future__ import division from collections import defaultdict from collections import deque import numpy as np import torch class AverageMeter(object): """Track a series of values and provide access to smoothed values over a window or the global series average. """ default_fmt = '{avg:.4f} ({global_avg:.4f})' default_summary_fmt = '{global_avg:.4f}' def __init__(self, window_size=20, fmt=None, summary_fmt=None): self.values = deque(maxlen=window_size) self.counts = deque(maxlen=window_size) self.sum = 0.0 self.count = 0 self.fmt = fmt or self.default_fmt self.summary_fmt = summary_fmt or self.default_summary_fmt def update(self, value, count=1): self.values.append(value) self.counts.append(count) self.sum += value self.count += count @property def avg(self): return np.sum(self.values) / np.sum(self.counts) @property def global_avg(self): return self.sum / self.count if self.count != 0 else float('nan') def reset(self): self.values.clear() self.counts.clear() self.sum = 0.0 self.count = 0 def __str__(self): return self.fmt.format(avg=self.avg, global_avg=self.global_avg) @property def summary_str(self): return self.summary_fmt.format(global_avg=self.global_avg) class MetricLogger(object): """Metric logger. All the meters should implement following methods: __str__, summary_str, reset """ def __init__(self, delimiter='\t'): self.meters = defaultdict(AverageMeter) self.delimiter = delimiter def update(self, **kwargs): for k, v in kwargs.items(): if isinstance(v, torch.Tensor): count = v.numel() value = v.item() if count == 1 else v.sum().item() elif isinstance(v, np.ndarray): count = v.size value = v.item() if count == 1 else v.sum().item() else: assert isinstance(v, (float, int)) value = v count = 1 self.meters[k].update(value, count) def add_meter(self, name, meter): self.meters[name] = meter def add_meters(self, meters): if not isinstance(meters, (list, tuple)): meters = [meters] for meter in meters: self.add_meter(meter.name, meter) def __getattr__(self, attr): if attr in self.meters: return self.meters[attr] return getattr(self, attr) def __str__(self): metric_str = [] for name, meter in self.meters.items(): metric_str.append('{}: {}'.format(name, str(meter))) return self.delimiter.join(metric_str) @property def summary_str(self): metric_str = [] for name, meter in self.meters.items(): metric_str.append('{}: {}'.format(name, meter.summary_str)) return self.delimiter.join(metric_str) def reset(self): for meter in self.meters.values(): meter.reset() ================================================ FILE: xmuda/common/utils/sampler.py ================================================ from torch.utils.data.sampler import Sampler class IterationBasedBatchSampler(Sampler): """ Wraps a BatchSampler, resampling from it until a specified number of iterations have been sampled References: https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py """ def __init__(self, batch_sampler, num_iterations, start_iter=0): self.batch_sampler = batch_sampler self.num_iterations = num_iterations self.start_iter = start_iter def __iter__(self): iteration = self.start_iter while iteration < self.num_iterations: # if the underlying sampler has a set_epoch method, like # DistributedSampler, used for making each process see # a different split of the dataset, then set it if hasattr(self.batch_sampler.sampler, "set_epoch"): self.batch_sampler.sampler.set_epoch(iteration) for batch in self.batch_sampler: yield batch iteration += 1 if iteration >= self.num_iterations: break def __len__(self): return self.num_iterations - self.start_iter def test_IterationBasedBatchSampler(): from torch.utils.data.sampler import SequentialSampler, RandomSampler, BatchSampler sampler = RandomSampler([i for i in range(9)]) batch_sampler = BatchSampler(sampler, batch_size=2, drop_last=True) batch_sampler = IterationBasedBatchSampler(batch_sampler, 6, start_iter=0) # check __len__ # assert len(batch_sampler) == 5 for i, index in enumerate(batch_sampler): print(i, index) # assert [i * 2, i * 2 + 1] == index # # check start iter # batch_sampler.start_iter = 2 # assert len(batch_sampler) == 3 if __name__ == '__main__': test_IterationBasedBatchSampler() ================================================ FILE: xmuda/common/utils/torch_util.py ================================================ import random import numpy as np import torch def set_random_seed(seed): if seed < 0: return random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) # torch.cuda.manual_seed_all(seed) def worker_init_fn(worker_id): """The function is designed for pytorch multi-process dataloader. Note that we use the pytorch random generator to generate a base_seed. Please try to be consistent. References: https://pytorch.org/docs/stable/notes/faq.html#dataloader-workers-random-seed """ base_seed = torch.IntTensor(1).random_().item() # print(worker_id, base_seed) np.random.seed(base_seed + worker_id) ================================================ FILE: xmuda/config/xmuda.py ================================================ """xMUDA experiments configuration""" import os.path as osp from xmuda.common.config.base import CN, _C # public alias cfg = _C _C.VAL.METRIC = 'seg_iou' # ---------------------------------------------------------------------------- # # Specific train options # ---------------------------------------------------------------------------- # _C.TRAIN.CLASS_WEIGHTS = [] # ---------------------------------------------------------------------------- # # xMUDA options # ---------------------------------------------------------------------------- # _C.TRAIN.XMUDA = CN() _C.TRAIN.XMUDA.lambda_xm_src = 0.0 _C.TRAIN.XMUDA.lambda_xm_trg = 0.0 _C.TRAIN.XMUDA.lambda_pl = 0.0 _C.TRAIN.XMUDA.lambda_minent = 0.0 _C.TRAIN.XMUDA.lambda_logcoral = 0.0 # ---------------------------------------------------------------------------- # # Datasets # ---------------------------------------------------------------------------- # _C.DATASET_SOURCE = CN() _C.DATASET_SOURCE.TYPE = '' _C.DATASET_SOURCE.TRAIN = tuple() _C.DATASET_TARGET = CN() _C.DATASET_TARGET.TYPE = '' _C.DATASET_TARGET.TRAIN = tuple() _C.DATASET_TARGET.VAL = tuple() _C.DATASET_TARGET.TEST = tuple() # NuScenesSCN _C.DATASET_SOURCE.NuScenesSCN = CN() _C.DATASET_SOURCE.NuScenesSCN.preprocess_dir = '' _C.DATASET_SOURCE.NuScenesSCN.nuscenes_dir = '' _C.DATASET_SOURCE.NuScenesSCN.merge_classes = True # 3D _C.DATASET_SOURCE.NuScenesSCN.scale = 20 _C.DATASET_SOURCE.NuScenesSCN.full_scale = 4096 # 2D _C.DATASET_SOURCE.NuScenesSCN.use_image = True _C.DATASET_SOURCE.NuScenesSCN.resize = (400, 225) _C.DATASET_SOURCE.NuScenesSCN.image_normalizer = () # 3D augmentation _C.DATASET_SOURCE.NuScenesSCN.augmentation = CN() _C.DATASET_SOURCE.NuScenesSCN.augmentation.noisy_rot = 0.1 _C.DATASET_SOURCE.NuScenesSCN.augmentation.flip_x = 0.5 _C.DATASET_SOURCE.NuScenesSCN.augmentation.rot_z = 6.2831 # 2 * pi _C.DATASET_SOURCE.NuScenesSCN.augmentation.transl = True # 2D augmentation _C.DATASET_SOURCE.NuScenesSCN.augmentation.fliplr = 0.5 _C.DATASET_SOURCE.NuScenesSCN.augmentation.color_jitter = (0.4, 0.4, 0.4) # copy over the same arguments to target dataset settings _C.DATASET_TARGET.NuScenesSCN = CN(_C.DATASET_SOURCE.NuScenesSCN) _C.DATASET_TARGET.NuScenesSCN.pselab_paths = tuple() # A2D2SCN _C.DATASET_SOURCE.A2D2SCN = CN() _C.DATASET_SOURCE.A2D2SCN.preprocess_dir = '' _C.DATASET_SOURCE.A2D2SCN.merge_classes = True # 3D _C.DATASET_SOURCE.A2D2SCN.scale = 20 _C.DATASET_SOURCE.A2D2SCN.full_scale = 4096 # 2D _C.DATASET_SOURCE.A2D2SCN.use_image = True _C.DATASET_SOURCE.A2D2SCN.resize = (480, 302) _C.DATASET_SOURCE.A2D2SCN.image_normalizer = () # 3D augmentation _C.DATASET_SOURCE.A2D2SCN.augmentation = CN() _C.DATASET_SOURCE.A2D2SCN.augmentation.noisy_rot = 0.1 _C.DATASET_SOURCE.A2D2SCN.augmentation.flip_y = 0.5 _C.DATASET_SOURCE.A2D2SCN.augmentation.rot_z = 6.2831 # 2 * pi _C.DATASET_SOURCE.A2D2SCN.augmentation.transl = True # 2D augmentation _C.DATASET_SOURCE.A2D2SCN.augmentation.fliplr = 0.5 _C.DATASET_SOURCE.A2D2SCN.augmentation.color_jitter = (0.4, 0.4, 0.4) # SemanticKITTISCN _C.DATASET_SOURCE.SemanticKITTISCN = CN() _C.DATASET_SOURCE.SemanticKITTISCN.preprocess_dir = '' _C.DATASET_SOURCE.SemanticKITTISCN.semantic_kitti_dir = '' _C.DATASET_SOURCE.SemanticKITTISCN.merge_classes = True # 3D _C.DATASET_SOURCE.SemanticKITTISCN.scale = 20 _C.DATASET_SOURCE.SemanticKITTISCN.full_scale = 4096 # 2D _C.DATASET_SOURCE.SemanticKITTISCN.image_normalizer = () # 3D augmentation _C.DATASET_SOURCE.SemanticKITTISCN.augmentation = CN() _C.DATASET_SOURCE.SemanticKITTISCN.augmentation.noisy_rot = 0.1 _C.DATASET_SOURCE.SemanticKITTISCN.augmentation.flip_y = 0.5 _C.DATASET_SOURCE.SemanticKITTISCN.augmentation.rot_z = 6.2831 # 2 * pi _C.DATASET_SOURCE.SemanticKITTISCN.augmentation.transl = True # 2D augmentation _C.DATASET_SOURCE.SemanticKITTISCN.augmentation.bottom_crop = (480, 302) _C.DATASET_SOURCE.SemanticKITTISCN.augmentation.fliplr = 0.5 _C.DATASET_SOURCE.SemanticKITTISCN.augmentation.color_jitter = (0.4, 0.4, 0.4) # copy over the same arguments to target dataset settings _C.DATASET_TARGET.SemanticKITTISCN = CN(_C.DATASET_SOURCE.SemanticKITTISCN) _C.DATASET_TARGET.SemanticKITTISCN.pselab_paths = tuple() # ---------------------------------------------------------------------------- # # Model 2D # ---------------------------------------------------------------------------- # _C.MODEL_2D = CN() _C.MODEL_2D.TYPE = '' _C.MODEL_2D.CKPT_PATH = '' _C.MODEL_2D.NUM_CLASSES = 5 _C.MODEL_2D.DUAL_HEAD = False # ---------------------------------------------------------------------------- # # UNetResNet34 options # ---------------------------------------------------------------------------- # _C.MODEL_2D.UNetResNet34 = CN() _C.MODEL_2D.UNetResNet34.pretrained = True # ---------------------------------------------------------------------------- # # Model 3D # ---------------------------------------------------------------------------- # _C.MODEL_3D = CN() _C.MODEL_3D.TYPE = '' _C.MODEL_3D.CKPT_PATH = '' _C.MODEL_3D.NUM_CLASSES = 5 _C.MODEL_3D.DUAL_HEAD = False # ----------------------------------------------------------------------------- # # SCN options # ----------------------------------------------------------------------------- # _C.MODEL_3D.SCN = CN() _C.MODEL_3D.SCN.in_channels = 1 _C.MODEL_3D.SCN.m = 16 # number of unet features (multiplied in each layer) _C.MODEL_3D.SCN.block_reps = 1 # block repetitions _C.MODEL_3D.SCN.residual_blocks = False # ResNet style basic blocks _C.MODEL_3D.SCN.full_scale = 4096 _C.MODEL_3D.SCN.num_planes = 7 # ---------------------------------------------------------------------------- # # Misc options # ---------------------------------------------------------------------------- # # @ will be replaced by config path _C.OUTPUT_DIR = osp.expanduser('~/workspace/outputs/xmuda/@') ================================================ FILE: xmuda/data/a2d2/a2d2_dataloader.py ================================================ import os.path as osp import pickle from PIL import Image import numpy as np from torch.utils.data import Dataset from torchvision import transforms as T import json from xmuda.data.utils.augmentation_3d import augment_and_scale_3d class A2D2Base(Dataset): """A2D2 dataset""" class_names = [ 'Car 1', 'Car 2', 'Car 3', 'Car 4', 'Bicycle 1', 'Bicycle 2', 'Bicycle 3', 'Bicycle 4', 'Pedestrian 1', 'Pedestrian 2', 'Pedestrian 3', 'Truck 1', 'Truck 2', 'Truck 3', 'Small vehicles 1', 'Small vehicles 2', 'Small vehicles 3', 'Traffic signal 1', 'Traffic signal 2', 'Traffic signal 3', 'Traffic sign 1', 'Traffic sign 2', 'Traffic sign 3', 'Utility vehicle 1', 'Utility vehicle 2', 'Sidebars', 'Speed bumper', 'Curbstone', 'Solid line', 'Irrelevant signs', 'Road blocks', 'Tractor', 'Non-drivable street', 'Zebra crossing', 'Obstacles / trash', 'Poles', 'RD restricted area', 'Animals', 'Grid structure', 'Signal corpus', 'Drivable cobblestone', 'Electronic traffic', 'Slow drive area', 'Nature object', 'Parking area', 'Sidewalk', 'Ego car', 'Painted driv. instr.', 'Traffic guide obj.', 'Dashed line', 'RD normal street', 'Sky', 'Buildings', 'Blurred area', 'Rain dirt' ] # use those categories if merge_classes == True categories = { 'car': ['Car 1', 'Car 2', 'Car 3', 'Car 4', 'Ego car'], 'truck': ['Truck 1', 'Truck 2', 'Truck 3'], 'bike': ['Bicycle 1', 'Bicycle 2', 'Bicycle 3', 'Bicycle 4', 'Small vehicles 1', 'Small vehicles 2', 'Small vehicles 3'], # small vehicles are "usually" motorcycles 'person': ['Pedestrian 1', 'Pedestrian 2', 'Pedestrian 3'], 'road': ['RD normal street', 'Zebra crossing', 'Solid line', 'RD restricted area', 'Slow drive area', 'Drivable cobblestone', 'Dashed line', 'Painted driv. instr.'], 'parking': ['Parking area'], 'sidewalk': ['Sidewalk', 'Curbstone'], 'building': ['Buildings'], 'nature': ['Nature object'], 'other-objects': ['Poles', 'Traffic signal 1', 'Traffic signal 2', 'Traffic signal 3', 'Traffic sign 1', 'Traffic sign 2', 'Traffic sign 3', 'Sidebars', 'Speed bumper', 'Irrelevant signs', 'Road blocks', 'Obstacles / trash', 'Animals', 'Signal corpus', 'Electronic traffic', 'Traffic guide obj.', 'Grid structure'], # 'ignore': ['Sky', 'Utility vehicle 1', 'Utility vehicle 2', 'Tractor', 'Non-drivable street', # 'Blurred area', 'Rain dirt'], } def __init__(self, split, preprocess_dir, merge_classes=False ): self.split = split self.preprocess_dir = preprocess_dir print("Initialize A2D2 dataloader") with open(osp.join(self.preprocess_dir, 'cams_lidars.json'), 'r') as f: self.config = json.load(f) assert isinstance(split, tuple) print('Load', split) self.data = [] for curr_split in split: with open(osp.join(self.preprocess_dir, 'preprocess', curr_split + '.pkl'), 'rb') as f: self.data.extend(pickle.load(f)) with open(osp.join(self.preprocess_dir, 'class_list.json'), 'r') as f: class_list = json.load(f) self.rgb_to_class = {} self.rgb_to_cls_idx = {} count = 0 for k, v in class_list.items(): # hex to rgb rgb_value = tuple(int(k.lstrip('#')[i:i + 2], 16) for i in (0, 2, 4)) self.rgb_to_class[rgb_value] = v self.rgb_to_cls_idx[rgb_value] = count count += 1 assert self.class_names == list(self.rgb_to_class.values()) if merge_classes: self.label_mapping = -100 * np.ones(len(self.rgb_to_class) + 1, dtype=int) for cat_idx, cat_list in enumerate(self.categories.values()): for class_name in cat_list: self.label_mapping[self.class_names.index(class_name)] = cat_idx self.class_names = list(self.categories.keys()) else: self.label_mapping = None def __getitem__(self, index): raise NotImplementedError def __len__(self): return len(self.data) class A2D2SCN(A2D2Base): def __init__(self, split, preprocess_dir, merge_classes=False, scale=20, full_scale=4096, use_image=False, resize=(480, 302), image_normalizer=None, noisy_rot=0.0, # 3D augmentation flip_y=0.0, # 3D augmentation rot_z=0.0, # 3D augmentation transl=False, # 3D augmentation fliplr=0.0, # 2D augmentation color_jitter=None, # 2D augmentation ): super().__init__(split, preprocess_dir, merge_classes=merge_classes) # point cloud parameters self.scale = scale self.full_scale = full_scale # 3D augmentation self.noisy_rot = noisy_rot self.flip_y = flip_y self.rot_z = rot_z self.transl = transl # image parameters self.use_image = use_image if self.use_image: self.resize = resize self.image_normalizer = image_normalizer # data augmentation self.fliplr = fliplr self.color_jitter = T.ColorJitter(*color_jitter) if color_jitter else None def __getitem__(self, index): data_dict = self.data[index] points = data_dict['points'].copy() seg_label = data_dict['seg_labels'].astype(np.int64) if self.label_mapping is not None: seg_label = self.label_mapping[seg_label] out_dict = {} if self.use_image: points_img = data_dict['points_img'].copy() img_path = osp.join(self.preprocess_dir, data_dict['camera_path']) image = Image.open(img_path) if self.resize: if not image.size == self.resize: # check if we do not enlarge downsized images assert image.size[0] > self.resize[0] # scale image points points_img[:, 0] = float(self.resize[1]) / image.size[1] * np.floor(points_img[:, 0]) points_img[:, 1] = float(self.resize[0]) / image.size[0] * np.floor(points_img[:, 1]) # resize image image = image.resize(self.resize, Image.BILINEAR) img_indices = points_img.astype(np.int64) assert np.all(img_indices[:, 0] >= 0) assert np.all(img_indices[:, 1] >= 0) assert np.all(img_indices[:, 0] < image.size[1]) assert np.all(img_indices[:, 1] < image.size[0]) # 2D augmentation if self.color_jitter is not None: image = self.color_jitter(image) # PIL to numpy image = np.array(image, dtype=np.float32, copy=False) / 255. # 2D augmentation if np.random.rand() < self.fliplr: image = np.ascontiguousarray(np.fliplr(image)) img_indices[:, 1] = image.shape[1] - 1 - img_indices[:, 1] # normalize image if self.image_normalizer: mean, std = self.image_normalizer mean = np.asarray(mean, dtype=np.float32) std = np.asarray(std, dtype=np.float32) image = (image - mean) / std out_dict['img'] = np.moveaxis(image, -1, 0) out_dict['img_indices'] = img_indices # 3D data augmentation and scaling from points to voxel indices # A2D2 lidar coordinates (same as Kitti): x (front), y (left), z (up) coords = augment_and_scale_3d(points, self.scale, self.full_scale, noisy_rot=self.noisy_rot, flip_y=self.flip_y, rot_z=self.rot_z, transl=self.transl) # cast to integer coords = coords.astype(np.int64) # only use voxels inside receptive field idxs = (coords.min(1) >= 0) * (coords.max(1) < self.full_scale) out_dict['coords'] = coords[idxs] out_dict['feats'] = np.ones([len(idxs), 1], np.float32) # simply use 1 as feature out_dict['seg_label'] = seg_label[idxs] if self.use_image: out_dict['img_indices'] = out_dict['img_indices'][idxs] return out_dict def test_A2D2SCN(): from xmuda.data.utils.visualize import draw_points_image_labels, draw_bird_eye_view preprocess_dir = '/datasets_local/datasets_mjaritz/a2d2_preprocess' split = ('test',) dataset = A2D2SCN(split=split, preprocess_dir=preprocess_dir, merge_classes=True, use_image=True, noisy_rot=0.1, flip_y=0.5, rot_z=2*np.pi, transl=True, fliplr=0.5, color_jitter=(0.4, 0.4, 0.4) ) for i in [10, 20, 30, 40, 50, 60]: data = dataset[i] coords = data['coords'] seg_label = data['seg_label'] img = np.moveaxis(data['img'], 0, 2) img_indices = data['img_indices'] draw_points_image_labels(img, img_indices, seg_label, color_palette_type='SemanticKITTI', point_size=3) draw_bird_eye_view(coords) def compute_class_weights(): preprocess_dir = '/datasets_local/datasets_mjaritz/a2d2_preprocess' split = ('train', 'test') dataset = A2D2Base(split, preprocess_dir, merge_classes=True ) # compute points per class over whole dataset num_classes = len(dataset.class_names) points_per_class = np.zeros(num_classes, int) for i, data in enumerate(dataset.data): print('{}/{}'.format(i, len(dataset))) labels = dataset.label_mapping[data['seg_labels']] points_per_class += np.bincount(labels[labels != -100], minlength=num_classes) # compute log smoothed class weights class_weights = np.log(5 * points_per_class.sum() / points_per_class) print('log smoothed class weights: ', class_weights / class_weights.min()) if __name__ == '__main__': test_A2D2SCN() # compute_class_weights() ================================================ FILE: xmuda/data/a2d2/preprocess.py ================================================ import os import os.path as osp import shutil import numpy as np import pickle import json from PIL import Image import cv2 import glob import torch from torch.utils.data import Dataset from torch.utils.data.dataloader import DataLoader from xmuda.data.a2d2 import splits from xmuda.data.a2d2.a2d2_dataloader import A2D2Base # prevent "RuntimeError: received 0 items of ancdata" torch.multiprocessing.set_sharing_strategy('file_system') class_names_to_id = dict(zip(A2D2Base.class_names, range(len(A2D2Base.class_names)))) def undistort_image(config, image, cam_name): """copied from https://www.a2d2.audi/a2d2/en/tutorial.html""" if cam_name in ['front_left', 'front_center', 'front_right', 'side_left', 'side_right', 'rear_center']: # get parameters from config file intr_mat_undist = np.asarray(config['cameras'][cam_name]['CamMatrix']) intr_mat_dist = np.asarray(config['cameras'][cam_name]['CamMatrixOriginal']) dist_parms = np.asarray(config['cameras'][cam_name]['Distortion']) lens = config['cameras'][cam_name]['Lens'] if lens == 'Fisheye': return cv2.fisheye.undistortImage(image, intr_mat_dist, D=dist_parms, Knew=intr_mat_undist) elif lens == 'Telecam': return cv2.undistort(image, intr_mat_dist, distCoeffs=dist_parms, newCameraMatrix=intr_mat_undist) else: return image else: return image class DummyDataset(Dataset): """Use torch dataloader for multiprocessing""" def __init__(self, root_dir, scenes): self.class_names = A2D2Base.class_names.copy() self.categories = A2D2Base.categories.copy() self.root_dir = root_dir self.data = [] self.glob_frames(scenes) # load config with open(osp.join(root_dir, 'cams_lidars.json'), 'r') as f: self.config = json.load(f) # load color to class mapping with open(osp.join(root_dir, 'class_list.json'), 'r') as f: class_list = json.load(f) self.rgb_to_class = {} self.rgb_to_cls_idx = {} count = 0 for k, v in class_list.items(): # hex to rgb rgb_value = tuple(int(k.lstrip('#')[i:i + 2], 16) for i in (0, 2, 4)) self.rgb_to_class[rgb_value] = v self.rgb_to_cls_idx[rgb_value] = count count += 1 assert list(class_names_to_id.keys()) == list(self.rgb_to_class.values()) def glob_frames(self, scenes): for scene in scenes: cam_paths = sorted(glob.glob(osp.join(self.root_dir, scene, 'camera', 'cam_front_center', '*.png'))) for cam_path in cam_paths: basename = osp.basename(cam_path) datetime = basename[:14] assert datetime.isdigit() frame_id = basename[-13:-4] assert frame_id.isdigit() data = { 'camera_path': cam_path, 'lidar_path': osp.join(self.root_dir, scene, 'lidar', 'cam_front_center', datetime + '_lidar_frontcenter_' + frame_id + '.npz'), 'label_path': osp.join(self.root_dir, scene, 'label', 'cam_front_center', datetime + '_label_frontcenter_' + frame_id + '.png'), } for k, v in data.items(): if not osp.exists(v): raise IOError('File not found {}'.format(v)) self.data.append(data) def __getitem__(self, index): data_dict = self.data[index].copy() lidar_front_center = np.load(data_dict['lidar_path']) points = lidar_front_center['points'] if 'row' not in lidar_front_center.keys(): print('row not in lidar dict, return None, {}'.format(data_dict['lidar_path'])) return {} rows = lidar_front_center['row'].astype(np.int) cols = lidar_front_center['col'].astype(np.int) # extract 3D labels from 2D label_img = np.array(Image.open(data_dict['label_path'])) label_img = undistort_image(self.config, label_img, 'front_center') label_pc = label_img[rows, cols, :] seg_label = np.full(label_pc.shape[0], fill_value=len(self.rgb_to_cls_idx), dtype=np.int64) # map RGB label code to index for rgb_values, cls_idx in self.rgb_to_cls_idx.items(): idx = (rgb_values == label_pc).all(1) if idx.any(): seg_label[idx] = cls_idx # load image image = Image.open(data_dict['camera_path']) image_size = image.size assert image_size == (1920, 1208) # undistort image = undistort_image(self.config, np.array(image), 'front_center') # scale image points points_img = np.stack([lidar_front_center['row'], lidar_front_center['col']], 1).astype(np.float32) # check if conversion from float64 to float32 has led to image points outside of image assert np.all(points_img[:, 0] < image_size[1]) assert np.all(points_img[:, 1] < image_size[0]) data_dict['seg_label'] = seg_label.astype(np.uint8) data_dict['points'] = points.astype(np.float32) data_dict['points_img'] = points_img # row, col format, shape: (num_points, 2) data_dict['img'] = image return data_dict def __len__(self): return len(self.data) def preprocess(split_name, root_dir, out_dir): pkl_data = [] split = getattr(splits, split_name) dataloader = DataLoader(DummyDataset(root_dir, split), num_workers=8) num_skips = 0 for i, data_dict in enumerate(dataloader): # data error leads to returning empty dict if not data_dict: print('empty dict, continue') num_skips += 1 continue for k, v in data_dict.items(): data_dict[k] = v[0] print('{}/{} {}'.format(i, len(dataloader), data_dict['lidar_path'])) # convert to relative path lidar_path = data_dict['lidar_path'].replace(root_dir + '/', '') cam_path = data_dict['camera_path'].replace(root_dir + '/', '') # save undistorted image new_cam_path = osp.join(out_dir, cam_path) os.makedirs(osp.dirname(new_cam_path), exist_ok=True) image = Image.fromarray(data_dict['img'].numpy()) image.save(new_cam_path) # append data out_dict = { 'points': data_dict['points'].numpy(), 'seg_labels': data_dict['seg_label'].numpy(), 'points_img': data_dict['points_img'].numpy(), # row, col format, shape: (num_points, 2) 'lidar_path': lidar_path, 'camera_path': cam_path, } pkl_data.append(out_dict) print('Skipped {} files'.format(num_skips)) # save to pickle file save_dir = osp.join(out_dir, 'preprocess') os.makedirs(save_dir, exist_ok=True) save_path = osp.join(save_dir, '{}.pkl'.format(split_name)) with open(save_path, 'wb') as f: pickle.dump(pkl_data, f) print('Wrote preprocessed data to ' + save_path) if __name__ == '__main__': root_dir = '/datasets_master/a2d2' out_dir = '/datasets_local/datasets_mjaritz/a2d2_preprocess' preprocess('test', root_dir, out_dir) # split into train1 and train2 to prevent segmentation fault in torch dataloader preprocess('train1', root_dir, out_dir) preprocess('train2', root_dir, out_dir) # merge train1 and train2 data = [] for curr_split in ['train1', 'train2']: with open(osp.join(out_dir, 'preprocess', curr_split + '.pkl'), 'rb') as f: data.extend(pickle.load(f)) save_path = osp.join(out_dir, 'preprocess', 'train.pkl') with open(save_path, 'wb') as f: pickle.dump(data, f) print('Wrote preprocessed data to ' + save_path) for curr_split in ['train1', 'train2']: os.remove(osp.join(out_dir, 'preprocess', curr_split + '.pkl')) # copy cams_lidars.json and class_list.json to out_dir for filename in ['cams_lidars.json', 'class_list.json']: shutil.copyfile(osp.join(root_dir, filename), osp.join(out_dir, filename)) ================================================ FILE: xmuda/data/a2d2/splits.py ================================================ train = [ '20180810_142822', '20180925_101535', '20180925_112730', '20180925_124435', '20180925_135056', '20181008_095521', '20181016_082154', '20181016_125231', '20181107_132300', '20181107_132730', '20181107_133258', '20181107_133445', '20181108_084007', '20181108_091945', '20181108_103155', '20181108_123750', '20181108_141609', '20181204_135952', '20181204_154421', '20181204_170238', ] train1 = [ '20180810_142822', '20180925_101535', '20180925_112730', '20180925_124435', '20180925_135056', '20181008_095521', '20181016_082154', '20181016_125231', '20181107_132300', '20181107_132730', ] train2 = [ '20181107_133258', '20181107_133445', '20181108_084007', '20181108_091945', '20181108_103155', '20181108_123750', '20181108_141609', '20181204_135952', '20181204_154421', '20181204_170238', ] test = [ '20180807_145028' ] all = [ '20180807_145028', '20180810_142822', '20180925_101535', '20180925_112730', '20180925_124435', '20180925_135056', '20181008_095521', '20181016_082154', # '20181016_095036', # no lidar '20181016_125231', '20181107_132300', '20181107_132730', '20181107_133258', '20181107_133445', '20181108_084007', '20181108_091945', '20181108_103155', '20181108_123750', '20181108_141609', '20181204_135952', '20181204_154421', '20181204_170238', # '20181204_191844', # no lidar ] ================================================ FILE: xmuda/data/build.py ================================================ from torch.utils.data.sampler import RandomSampler, BatchSampler from torch.utils.data.dataloader import DataLoader, default_collate from yacs.config import CfgNode as CN from xmuda.common.utils.torch_util import worker_init_fn from xmuda.data.collate import get_collate_scn from xmuda.common.utils.sampler import IterationBasedBatchSampler from xmuda.data.nuscenes.nuscenes_dataloader import NuScenesSCN from xmuda.data.a2d2.a2d2_dataloader import A2D2SCN from xmuda.data.semantic_kitti.semantic_kitti_dataloader import SemanticKITTISCN def build_dataloader(cfg, mode='train', domain='source', start_iteration=0, halve_batch_size=False): assert mode in ['train', 'val', 'test', 'train_labeled', 'train_unlabeled'] dataset_cfg = cfg.get('DATASET_' + domain.upper()) split = dataset_cfg[mode.upper()] is_train = 'train' in mode batch_size = cfg['TRAIN'].BATCH_SIZE if is_train else cfg['VAL'].BATCH_SIZE if halve_batch_size: batch_size = batch_size // 2 # build dataset # Make a copy of dataset_kwargs so that we can pop augmentation afterwards without destroying the cfg. # Note that the build_dataloader fn is called twice for train and val. dataset_kwargs = CN(dataset_cfg.get(dataset_cfg.TYPE, dict())) if 'SCN' in cfg.MODEL_3D.keys(): assert dataset_kwargs.full_scale == cfg.MODEL_3D.SCN.full_scale augmentation = dataset_kwargs.pop('augmentation') augmentation = augmentation if is_train else dict() # use pselab_paths only when training on target if domain == 'target' and not is_train: dataset_kwargs.pop('pselab_paths') if dataset_cfg.TYPE == 'NuScenesSCN': dataset = NuScenesSCN(split=split, output_orig=not is_train, **dataset_kwargs, **augmentation) elif dataset_cfg.TYPE == 'A2D2SCN': dataset = A2D2SCN(split=split, **dataset_kwargs, **augmentation) elif dataset_cfg.TYPE == 'SemanticKITTISCN': dataset = SemanticKITTISCN(split=split, output_orig=not is_train, **dataset_kwargs, **augmentation) else: raise ValueError('Unsupported type of dataset: {}.'.format(dataset_cfg.TYPE)) if 'SCN' in dataset_cfg.TYPE: collate_fn = get_collate_scn(is_train) else: collate_fn = default_collate if is_train: sampler = RandomSampler(dataset) batch_sampler = BatchSampler(sampler, batch_size=batch_size, drop_last=cfg.DATALOADER.DROP_LAST) batch_sampler = IterationBasedBatchSampler(batch_sampler, cfg.SCHEDULER.MAX_ITERATION, start_iteration) dataloader = DataLoader( dataset, batch_sampler=batch_sampler, num_workers=cfg.DATALOADER.NUM_WORKERS, worker_init_fn=worker_init_fn, collate_fn=collate_fn ) else: dataloader = DataLoader( dataset, batch_size=batch_size, drop_last=False, num_workers=cfg.DATALOADER.NUM_WORKERS, worker_init_fn=worker_init_fn, collate_fn=collate_fn ) return dataloader ================================================ FILE: xmuda/data/collate.py ================================================ import torch from functools import partial def collate_scn_base(input_dict_list, output_orig, output_image=True): """ Custom collate function for SCN. The batch size is always 1, but the batch indices are appended to the locations. :param input_dict_list: a list of dicts from the dataloader :param output_orig: whether to output original point cloud/labels/indices :param output_image: whether to output images :return: Collated data batch as dict """ locs=[] feats=[] labels=[] if output_image: imgs = [] img_idxs = [] if output_orig: orig_seg_label = [] orig_points_idx = [] output_pselab = 'pseudo_label_2d' in input_dict_list[0].keys() if output_pselab: pseudo_label_2d = [] pseudo_label_3d = [] for idx, input_dict in enumerate(input_dict_list): coords = torch.from_numpy(input_dict['coords']) batch_idxs = torch.LongTensor(coords.shape[0], 1).fill_(idx) locs.append(torch.cat([coords, batch_idxs], 1)) feats.append(torch.from_numpy(input_dict['feats'])) if 'seg_label' in input_dict.keys(): labels.append(torch.from_numpy(input_dict['seg_label'])) if output_image: imgs.append(torch.from_numpy(input_dict['img'])) img_idxs.append(input_dict['img_indices']) if output_orig: orig_seg_label.append(input_dict['orig_seg_label']) orig_points_idx.append(input_dict['orig_points_idx']) if output_pselab: pseudo_label_2d.append(torch.from_numpy(input_dict['pseudo_label_2d'])) if input_dict['pseudo_label_3d'] is not None: pseudo_label_3d.append(torch.from_numpy(input_dict['pseudo_label_3d'])) locs = torch.cat(locs, 0) feats = torch.cat(feats, 0) out_dict = {'x': [locs, feats]} if labels: labels = torch.cat(labels, 0) out_dict['seg_label'] = labels if output_image: out_dict['img'] = torch.stack(imgs) out_dict['img_indices'] = img_idxs if output_orig: out_dict['orig_seg_label'] = orig_seg_label out_dict['orig_points_idx'] = orig_points_idx if output_pselab: out_dict['pseudo_label_2d'] = torch.cat(pseudo_label_2d, 0) out_dict['pseudo_label_3d'] = torch.cat(pseudo_label_3d, 0) if pseudo_label_3d else pseudo_label_3d return out_dict def get_collate_scn(is_train): return partial(collate_scn_base, output_orig=not is_train, ) ================================================ FILE: xmuda/data/nuscenes/nuscenes_dataloader.py ================================================ import os.path as osp import pickle from PIL import Image import numpy as np from torch.utils.data import Dataset from torchvision import transforms as T from xmuda.data.utils.refine_pseudo_labels import refine_pseudo_labels from xmuda.data.utils.augmentation_3d import augment_and_scale_3d class NuScenesBase(Dataset): """NuScenes dataset""" class_names = [ "car", "truck", "bus", "trailer", "construction_vehicle", "pedestrian", "motorcycle", "bicycle", "traffic_cone", "barrier", "background", ] # use those categories if merge_classes == True categories = { "vehicle": ["car", "truck", "bus", "trailer", "construction_vehicle"], "pedestrian": ["pedestrian"], "bike": ["motorcycle", "bicycle"], "traffic_boundary": ["traffic_cone", "barrier"], "background": ["background"] } def __init__(self, split, preprocess_dir, merge_classes=False, pselab_paths=None ): self.split = split self.preprocess_dir = preprocess_dir print("Initialize Nuscenes dataloader") assert isinstance(split, tuple) print('Load', split) self.data = [] for curr_split in split: with open(osp.join(self.preprocess_dir, curr_split + '.pkl'), 'rb') as f: self.data.extend(pickle.load(f)) self.pselab_data = None if pselab_paths: assert isinstance(pselab_paths, tuple) print('Load pseudo label data ', pselab_paths) self.pselab_data = [] for curr_split in pselab_paths: self.pselab_data.extend(np.load(curr_split, allow_pickle=True)) # check consistency of data and pseudo labels assert len(self.pselab_data) == len(self.data) for i in range(len(self.pselab_data)): assert len(self.pselab_data[i]['pseudo_label_2d']) == len(self.data[i]['seg_labels']) # refine 2d pseudo labels probs2d = np.concatenate([data['probs_2d'] for data in self.pselab_data]) pseudo_label_2d = np.concatenate([data['pseudo_label_2d'] for data in self.pselab_data]).astype(np.int) pseudo_label_2d = refine_pseudo_labels(probs2d, pseudo_label_2d) # refine 3d pseudo labels # fusion model has only one final prediction saved in probs_2d if 'probs_3d' in self.pselab_data[0].keys(): probs3d = np.concatenate([data['probs_3d'] for data in self.pselab_data]) pseudo_label_3d = np.concatenate([data['pseudo_label_3d'] for data in self.pselab_data]).astype(np.int) pseudo_label_3d = refine_pseudo_labels(probs3d, pseudo_label_3d) else: pseudo_label_3d = None # undo concat left_idx = 0 for data_idx in range(len(self.pselab_data)): right_idx = left_idx + len(self.pselab_data[data_idx]['probs_2d']) self.pselab_data[data_idx]['pseudo_label_2d'] = pseudo_label_2d[left_idx:right_idx] if pseudo_label_3d is not None: self.pselab_data[data_idx]['pseudo_label_3d'] = pseudo_label_3d[left_idx:right_idx] else: self.pselab_data[data_idx]['pseudo_label_3d'] = None left_idx = right_idx if merge_classes: self.label_mapping = -100 * np.ones(len(self.class_names), dtype=int) for cat_idx, cat_list in enumerate(self.categories.values()): for class_name in cat_list: self.label_mapping[self.class_names.index(class_name)] = cat_idx self.class_names = list(self.categories.keys()) else: self.label_mapping = None def __getitem__(self, index): raise NotImplementedError def __len__(self): return len(self.data) class NuScenesSCN(NuScenesBase): def __init__(self, split, preprocess_dir, nuscenes_dir='', pselab_paths=None, merge_classes=False, scale=20, full_scale=4096, use_image=False, resize=(400, 225), image_normalizer=None, noisy_rot=0.0, # 3D augmentation flip_x=0.0, # 3D augmentation rot_z=0.0, # 3D augmentation transl=False, # 3D augmentation fliplr=0.0, # 2D augmentation color_jitter=None, # 2D augmentation output_orig=False ): super().__init__(split, preprocess_dir, merge_classes=merge_classes, pselab_paths=pselab_paths) self.nuscenes_dir = nuscenes_dir self.output_orig = output_orig # point cloud parameters self.scale = scale self.full_scale = full_scale # 3D augmentation self.noisy_rot = noisy_rot self.flip_x = flip_x self.rot_z = rot_z self.transl = transl # image parameters self.use_image = use_image if self.use_image: self.resize = resize self.image_normalizer = image_normalizer # data augmentation self.fliplr = fliplr self.color_jitter = T.ColorJitter(*color_jitter) if color_jitter else None def __getitem__(self, index): data_dict = self.data[index] points = data_dict['points'].copy() seg_label = data_dict['seg_labels'].astype(np.int64) if self.label_mapping is not None: seg_label = self.label_mapping[seg_label] out_dict = {} keep_idx = np.ones(len(points), dtype=np.bool) if self.use_image: points_img = data_dict['points_img'].copy() img_path = osp.join(self.nuscenes_dir, data_dict['camera_path']) image = Image.open(img_path) if self.resize: if not image.size == self.resize: # check if we do not enlarge downsized images assert image.size[0] > self.resize[0] # scale image points points_img[:, 0] = float(self.resize[1]) / image.size[1] * np.floor(points_img[:, 0]) points_img[:, 1] = float(self.resize[0]) / image.size[0] * np.floor(points_img[:, 1]) # resize image image = image.resize(self.resize, Image.BILINEAR) img_indices = points_img.astype(np.int64) assert np.all(img_indices[:, 0] >= 0) assert np.all(img_indices[:, 1] >= 0) assert np.all(img_indices[:, 0] < image.size[1]) assert np.all(img_indices[:, 1] < image.size[0]) # 2D augmentation if self.color_jitter is not None: image = self.color_jitter(image) # PIL to numpy image = np.array(image, dtype=np.float32, copy=False) / 255. # 2D augmentation if np.random.rand() < self.fliplr: image = np.ascontiguousarray(np.fliplr(image)) img_indices[:, 1] = image.shape[1] - 1 - img_indices[:, 1] # normalize image if self.image_normalizer: mean, std = self.image_normalizer mean = np.asarray(mean, dtype=np.float32) std = np.asarray(std, dtype=np.float32) image = (image - mean) / std out_dict['img'] = np.moveaxis(image, -1, 0) out_dict['img_indices'] = img_indices # 3D data augmentation and scaling from points to voxel indices # nuscenes lidar coordinates: x (right), y (front), z (up) coords = augment_and_scale_3d(points, self.scale, self.full_scale, noisy_rot=self.noisy_rot, flip_x=self.flip_x, rot_z=self.rot_z, transl=self.transl) # cast to integer coords = coords.astype(np.int64) # only use voxels inside receptive field idxs = (coords.min(1) >= 0) * (coords.max(1) < self.full_scale) out_dict['coords'] = coords[idxs] out_dict['feats'] = np.ones([len(idxs), 1], np.float32) # simply use 1 as feature out_dict['seg_label'] = seg_label[idxs] if self.use_image: out_dict['img_indices'] = out_dict['img_indices'][idxs] if self.pselab_data is not None: out_dict.update({ 'pseudo_label_2d': self.pselab_data[index]['pseudo_label_2d'][keep_idx][idxs], 'pseudo_label_3d': self.pselab_data[index]['pseudo_label_3d'][keep_idx][idxs] }) if self.output_orig: out_dict.update({ 'orig_seg_label': seg_label, 'orig_points_idx': idxs, }) return out_dict def test_NuScenesSCN(): from xmuda.data.utils.visualize import draw_points_image_labels, draw_points_image_depth, draw_bird_eye_view preprocess_dir = '/datasets_local/datasets_mjaritz/nuscenes_preprocess/preprocess' nuscenes_dir = '/datasets_local/datasets_mjaritz/nuscenes_preprocess' # split = ('train_singapore',) # pselab_paths = ('/home/docker_user/workspace/outputs/xmuda/nuscenes/usa_singapore/xmuda/pselab_data/train_singapore.npy',) split = ('train_night',) # pselab_paths = ('/home/docker_user/workspace/outputs/xmuda/nuscenes/day_night/xmuda/pselab_data/train_night.npy',) dataset = NuScenesSCN(split=split, preprocess_dir=preprocess_dir, nuscenes_dir=nuscenes_dir, # pselab_paths=pselab_paths, merge_classes=True, use_image=True, noisy_rot=0.1, flip_x=0.5, rot_z=2*np.pi, transl=True, fliplr=0.5, color_jitter=(0.4, 0.4, 0.4) ) for i in [10, 20, 30, 40, 50, 60]: data = dataset[i] coords = data['coords'] seg_label = data['seg_label'] img = np.moveaxis(data['img'], 0, 2) img_indices = data['img_indices'] draw_points_image_labels(img, img_indices, seg_label, color_palette_type='NuScenes', point_size=3) # pseudo_label_2d = data['pseudo_label_2d'] # draw_points_image_labels(img, img_indices, pseudo_label_2d, color_palette_type='NuScenes', point_size=3) draw_bird_eye_view(coords) print('Number of points:', len(coords)) def compute_class_weights(): preprocess_dir = '/datasets_local/datasets_mjaritz/nuscenes_preprocess/preprocess' # split = ('train_usa', 'test_usa') split = ('train_day', 'test_day') dataset = NuScenesBase(split, preprocess_dir, merge_classes=True ) # compute points per class over whole dataset num_classes = len(dataset.class_names) points_per_class = np.zeros(num_classes, int) for i, data in enumerate(dataset.data): print('{}/{}'.format(i, len(dataset))) points_per_class += np.bincount(dataset.label_mapping[data['seg_labels']], minlength=num_classes) # compute log smoothed class weights class_weights = np.log(5 * points_per_class.sum() / points_per_class) print('log smoothed class weights: ', class_weights / class_weights.min()) if __name__ == '__main__': test_NuScenesSCN() # compute_class_weights() ================================================ FILE: xmuda/data/nuscenes/preprocess.py ================================================ import os import os.path as osp import numpy as np import pickle from nuscenes.nuscenes import NuScenes from nuscenes.utils.geometry_utils import points_in_box from nuscenes.eval.detection.utils import category_to_detection_name from xmuda.data.nuscenes.nuscenes_dataloader import NuScenesBase from xmuda.data.nuscenes.projection import map_pointcloud_to_image from xmuda.data.nuscenes import splits class_names_to_id = dict(zip(NuScenesBase.class_names, range(len(NuScenesBase.class_names)))) if 'background' in class_names_to_id: del class_names_to_id['background'] def preprocess(nusc, split_names, root_dir, out_dir, keyword=None, keyword_action=None, subset_name=None, location=None): # cannot process day/night and location at the same time assert not (bool(keyword) and bool(location)) if keyword: assert keyword_action in ['filter', 'exclude'] # init dict to save pkl_dict = {} for split_name in split_names: pkl_dict[split_name] = [] for i, sample in enumerate(nusc.sample): curr_scene_name = nusc.get('scene', sample['scene_token'])['name'] # get if the current scene is in train, val or test curr_split = None for split_name in split_names: if curr_scene_name in getattr(splits, split_name): curr_split = split_name break if curr_split is None: continue if subset_name == 'night': if curr_split == 'train': if curr_scene_name in splits.val_night: curr_split = 'val' if subset_name == 'singapore': if curr_split == 'train': if curr_scene_name in splits.val_singapore: curr_split = 'val' # filter for day/night if keyword: scene_description = nusc.get("scene", sample["scene_token"])["description"] if keyword.lower() in scene_description.lower(): if keyword_action == 'exclude': # skip sample continue else: if keyword_action == 'filter': # skip sample continue if location: scene = nusc.get("scene", sample["scene_token"]) if location not in nusc.get("log", scene['log_token'])['location']: continue lidar_token = sample["data"]["LIDAR_TOP"] cam_front_token = sample["data"]["CAM_FRONT"] lidar_path, boxes_lidar, _ = nusc.get_sample_data(lidar_token) cam_path, boxes_front_cam, cam_intrinsic = nusc.get_sample_data(cam_front_token) print('{}/{} {} {}'.format(i + 1, len(nusc.sample), curr_scene_name, lidar_path)) sd_rec_lidar = nusc.get('sample_data', sample['data']["LIDAR_TOP"]) cs_record_lidar = nusc.get('calibrated_sensor', sd_rec_lidar['calibrated_sensor_token']) pose_record_lidar = nusc.get('ego_pose', sd_rec_lidar['ego_pose_token']) sd_rec_cam = nusc.get('sample_data', sample['data']["CAM_FRONT"]) cs_record_cam = nusc.get('calibrated_sensor', sd_rec_cam['calibrated_sensor_token']) pose_record_cam = nusc.get('ego_pose', sd_rec_cam['ego_pose_token']) calib_infos = { "lidar2ego_translation": cs_record_lidar['translation'], "lidar2ego_rotation": cs_record_lidar['rotation'], "ego2global_translation_lidar": pose_record_lidar['translation'], "ego2global_rotation_lidar": pose_record_lidar['rotation'], "ego2global_translation_cam": pose_record_cam['translation'], "ego2global_rotation_cam": pose_record_cam['rotation'], "cam2ego_translation": cs_record_cam['translation'], "cam2ego_rotation": cs_record_cam['rotation'], "cam_intrinsic": cam_intrinsic, } # load lidar points pts = np.fromfile(lidar_path, dtype=np.float32, count=-1).reshape([-1, 5])[:, :3].T # map point cloud into front camera image pts_valid_flag, pts_cam_coord, pts_img = map_pointcloud_to_image(pts, (900, 1600, 3), calib_infos) # fliplr so that indexing is row, col and not col, row pts_img = np.ascontiguousarray(np.fliplr(pts_img)) # only use lidar points in the front camera image pts = pts[:, pts_valid_flag] num_pts = pts.shape[1] seg_labels = np.full(num_pts, fill_value=len(class_names_to_id), dtype=np.uint8) # only use boxes that are visible in camera valid_box_tokens = [box.token for box in boxes_front_cam] boxes = [box for box in boxes_lidar if box.token in valid_box_tokens] for box in boxes: # get points that lie inside of the box fg_mask = points_in_box(box, pts) det_class = category_to_detection_name(box.name) if det_class is not None: seg_labels[fg_mask] = class_names_to_id[det_class] # convert to relative path lidar_path = lidar_path.replace(root_dir + '/', '') cam_path = cam_path.replace(root_dir + '/', '') # transpose to yield shape (num_points, 3) pts = pts.T # append data to train, val or test list in pkl_dict data_dict = { 'points': pts, 'seg_labels': seg_labels, 'points_img': pts_img, # row, col format, shape: (num_points, 2) 'lidar_path': lidar_path, 'camera_path': cam_path, 'boxes': boxes_lidar, "sample_token": sample["token"], "scene_name": curr_scene_name, "calib": calib_infos } pkl_dict[curr_split].append(data_dict) # save to pickle file save_dir = osp.join(out_dir, 'preprocess') os.makedirs(save_dir, exist_ok=True) for split_name in split_names: save_path = osp.join(save_dir, '{}{}.pkl'.format(split_name, '_' + subset_name if subset_name else '')) with open(save_path, 'wb') as f: pickle.dump(pkl_dict[split_name], f) print('Wrote preprocessed data to ' + save_path) if __name__ == '__main__': root_dir = '/datasets_master/nuscenes' out_dir = '/datasets_local/datasets_mjaritz/nuscenes_preprocess' nusc = NuScenes(version='v1.0-trainval', dataroot=root_dir, verbose=True) # for faster debugging, the script can be run using the mini dataset # nusc = NuScenes(version='v1.0-mini', dataroot=root_dir, verbose=True) # We construct the splits by using the meta data of NuScenes: # USA/Singapore: We check if the location is Boston or Singapore. # Day/Night: We detect if "night" occurs in the scene description string. preprocess(nusc, ['train', 'test'], root_dir, out_dir, location='boston', subset_name='usa') preprocess(nusc, ['train', 'val', 'test'], root_dir, out_dir, location='singapore', subset_name='singapore') preprocess(nusc, ['train', 'test'], root_dir, out_dir, keyword='night', keyword_action='exclude', subset_name='day') preprocess(nusc, ['train', 'val', 'test'], root_dir, out_dir, keyword='night', keyword_action='filter', subset_name='night') ================================================ FILE: xmuda/data/nuscenes/projection.py ================================================ import numpy as np from pyquaternion import Quaternion from nuscenes.utils.geometry_utils import view_points import matplotlib.pyplot as plt # modified from https://github.com/nutonomy/nuscenes-devkit/blob/master/python-sdk/nuscenes/nuscenes.py def map_pointcloud_to_image(pc, im_shape, info, im=None): """ Maps the lidar point cloud to the image. :param pc: (3, N) :param im_shape: image to check size and debug :param info: dict with calibration infos :param im: image, only for visualization :return: """ pc = pc.copy() # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep. pc = Quaternion(info['lidar2ego_rotation']).rotation_matrix @ pc pc = pc + np.array(info['lidar2ego_translation'])[:, np.newaxis] # Second step: transform to the global frame. pc = Quaternion(info['ego2global_rotation_lidar']).rotation_matrix @ pc pc = pc + np.array(info['ego2global_translation_lidar'])[:, np.newaxis] # Third step: transform into the ego vehicle frame for the timestamp of the image. pc = pc - np.array(info['ego2global_translation_cam'])[:, np.newaxis] pc = Quaternion(info['ego2global_rotation_cam']).rotation_matrix.T @ pc # Fourth step: transform into the camera. pc = pc - np.array(info['cam2ego_translation'])[:, np.newaxis] pc = Quaternion(info['cam2ego_rotation']).rotation_matrix.T @ pc # Fifth step: actually take a "picture" of the point cloud. # Grab the depths (camera frame z axis points away from the camera). depths = pc[2, :] # Take the actual picture (matrix multiplication with camera-matrix + renormalization). points = view_points(pc, np.array(info['cam_intrinsic']), normalize=True) # Cast to float32 to prevent later rounding errors points = points.astype(np.float32) # Remove points that are either outside or behind the camera. mask = np.ones(depths.shape[0], dtype=bool) mask = np.logical_and(mask, depths > 0) mask = np.logical_and(mask, points[0, :] > 0) mask = np.logical_and(mask, points[0, :] < im_shape[1]) mask = np.logical_and(mask, points[1, :] > 0) mask = np.logical_and(mask, points[1, :] < im_shape[0]) points = points[:, mask] # debug if im is not None: # Retrieve the color from the depth. coloring = depths coloring = coloring[mask] plt.figure(figsize=(9, 16)) plt.imshow(im) plt.scatter(points[0, :], points[1, :], c=coloring, s=2) plt.axis('off') # plt.show() return mask, pc.T, points.T[:, :2] ================================================ FILE: xmuda/data/nuscenes/splits.py ================================================ # Official training set in NuScenes. We split scenes either into USA/Singapore or Day/Night. train = \ ['scene-0001', 'scene-0002', 'scene-0004', 'scene-0005', 'scene-0006', 'scene-0007', 'scene-0008', 'scene-0009', 'scene-0010', 'scene-0011', 'scene-0019', 'scene-0020', 'scene-0021', 'scene-0022', 'scene-0023', 'scene-0024', 'scene-0025', 'scene-0026', 'scene-0027', 'scene-0028', 'scene-0029', 'scene-0030', 'scene-0031', 'scene-0032', 'scene-0033', 'scene-0034', 'scene-0041', 'scene-0042', 'scene-0043', 'scene-0044', 'scene-0045', 'scene-0046', 'scene-0047', 'scene-0048', 'scene-0049', 'scene-0050', 'scene-0051', 'scene-0052', 'scene-0053', 'scene-0054', 'scene-0055', 'scene-0056', 'scene-0057', 'scene-0058', 'scene-0059', 'scene-0060', 'scene-0061', 'scene-0062', 'scene-0063', 'scene-0064', 'scene-0065', 'scene-0066', 'scene-0067', 'scene-0068', 'scene-0069', 'scene-0070', 'scene-0071', 'scene-0072', 'scene-0073', 'scene-0074', 'scene-0075', 'scene-0076', 'scene-0120', 'scene-0121', 'scene-0122', 'scene-0123', 'scene-0124', 'scene-0125', 'scene-0126', 'scene-0127', 'scene-0128', 'scene-0129', 'scene-0130', 'scene-0131', 'scene-0132', 'scene-0133', 'scene-0134', 'scene-0135', 'scene-0138', 'scene-0139', 'scene-0149', 'scene-0150', 'scene-0151', 'scene-0152', 'scene-0154', 'scene-0155', 'scene-0157', 'scene-0158', 'scene-0159', 'scene-0160', 'scene-0161', 'scene-0162', 'scene-0163', 'scene-0164', 'scene-0165', 'scene-0166', 'scene-0167', 'scene-0168', 'scene-0170', 'scene-0171', 'scene-0172', 'scene-0173', 'scene-0174', 'scene-0175', 'scene-0176', 'scene-0177', 'scene-0178', 'scene-0179', 'scene-0180', 'scene-0181', 'scene-0182', 'scene-0183', 'scene-0184', 'scene-0185', 'scene-0187', 'scene-0188', 'scene-0190', 'scene-0191', 'scene-0192', 'scene-0193', 'scene-0194', 'scene-0195', 'scene-0196', 'scene-0199', 'scene-0200', 'scene-0202', 'scene-0203', 'scene-0204', 'scene-0206', 'scene-0207', 'scene-0208', 'scene-0209', 'scene-0210', 'scene-0211', 'scene-0212', 'scene-0213', 'scene-0214', 'scene-0218', 'scene-0219', 'scene-0220', 'scene-0222', 'scene-0224', 'scene-0225', 'scene-0226', 'scene-0227', 'scene-0228', 'scene-0229', 'scene-0230', 'scene-0231', 'scene-0232', 'scene-0233', 'scene-0234', 'scene-0235', 'scene-0236', 'scene-0237', 'scene-0238', 'scene-0239', 'scene-0240', 'scene-0241', 'scene-0242', 'scene-0243', 'scene-0244', 'scene-0245', 'scene-0246', 'scene-0247', 'scene-0248', 'scene-0249', 'scene-0250', 'scene-0251', 'scene-0252', 'scene-0253', 'scene-0254', 'scene-0255', 'scene-0256', 'scene-0257', 'scene-0258', 'scene-0259', 'scene-0260', 'scene-0261', 'scene-0262', 'scene-0263', 'scene-0264', 'scene-0283', 'scene-0284', 'scene-0285', 'scene-0286', 'scene-0287', 'scene-0288', 'scene-0289', 'scene-0290', 'scene-0291', 'scene-0292', 'scene-0293', 'scene-0294', 'scene-0295', 'scene-0296', 'scene-0297', 'scene-0298', 'scene-0299', 'scene-0300', 'scene-0301', 'scene-0302', 'scene-0303', 'scene-0304', 'scene-0305', 'scene-0306', 'scene-0315', 'scene-0316', 'scene-0317', 'scene-0318', 'scene-0321', 'scene-0323', 'scene-0324', 'scene-0328', 'scene-0347', 'scene-0348', 'scene-0349', 'scene-0350', 'scene-0351', 'scene-0352', 'scene-0353', 'scene-0354', 'scene-0355', 'scene-0356', 'scene-0357', 'scene-0358', 'scene-0359', 'scene-0360', 'scene-0361', 'scene-0362', 'scene-0363', 'scene-0364', 'scene-0365', 'scene-0366', 'scene-0367', 'scene-0368', 'scene-0369', 'scene-0370', 'scene-0371', 'scene-0372', 'scene-0373', 'scene-0374', 'scene-0375', 'scene-0376', 'scene-0377', 'scene-0378', 'scene-0379', 'scene-0380', 'scene-0381', 'scene-0382', 'scene-0383', 'scene-0384', 'scene-0385', 'scene-0386', 'scene-0388', 'scene-0389', 'scene-0390', 'scene-0391', 'scene-0392', 'scene-0393', 'scene-0394', 'scene-0395', 'scene-0396', 'scene-0397', 'scene-0398', 'scene-0399', 'scene-0400', 'scene-0401', 'scene-0402', 'scene-0403', 'scene-0405', 'scene-0406', 'scene-0407', 'scene-0408', 'scene-0410', 'scene-0411', 'scene-0412', 'scene-0413', 'scene-0414', 'scene-0415', 'scene-0416', 'scene-0417', 'scene-0418', 'scene-0419', 'scene-0420', 'scene-0421', 'scene-0422', 'scene-0423', 'scene-0424', 'scene-0425', 'scene-0426', 'scene-0427', 'scene-0428', 'scene-0429', 'scene-0430', 'scene-0431', 'scene-0432', 'scene-0433', 'scene-0434', 'scene-0435', 'scene-0436', 'scene-0437', 'scene-0438', 'scene-0439', 'scene-0440', 'scene-0441', 'scene-0442', 'scene-0443', 'scene-0444', 'scene-0445', 'scene-0446', 'scene-0447', 'scene-0448', 'scene-0449', 'scene-0450', 'scene-0451', 'scene-0452', 'scene-0453', 'scene-0454', 'scene-0455', 'scene-0456', 'scene-0457', 'scene-0458', 'scene-0459', 'scene-0461', 'scene-0462', 'scene-0463', 'scene-0464', 'scene-0465', 'scene-0467', 'scene-0468', 'scene-0469', 'scene-0471', 'scene-0472', 'scene-0474', 'scene-0475', 'scene-0476', 'scene-0477', 'scene-0478', 'scene-0479', 'scene-0480', 'scene-0499', 'scene-0500', 'scene-0501', 'scene-0502', 'scene-0504', 'scene-0505', 'scene-0506', 'scene-0507', 'scene-0508', 'scene-0509', 'scene-0510', 'scene-0511', 'scene-0512', 'scene-0513', 'scene-0514', 'scene-0515', 'scene-0517', 'scene-0518', 'scene-0525', 'scene-0526', 'scene-0527', 'scene-0528', 'scene-0529', 'scene-0530', 'scene-0531', 'scene-0532', 'scene-0533', 'scene-0534', 'scene-0535', 'scene-0536', 'scene-0537', 'scene-0538', 'scene-0539', 'scene-0541', 'scene-0542', 'scene-0543', 'scene-0544', 'scene-0545', 'scene-0546', 'scene-0566', 'scene-0568', 'scene-0570', 'scene-0571', 'scene-0572', 'scene-0573', 'scene-0574', 'scene-0575', 'scene-0576', 'scene-0577', 'scene-0578', 'scene-0580', 'scene-0582', 'scene-0583', 'scene-0584', 'scene-0585', 'scene-0586', 'scene-0587', 'scene-0588', 'scene-0589', 'scene-0590', 'scene-0591', 'scene-0592', 'scene-0593', 'scene-0594', 'scene-0595', 'scene-0596', 'scene-0597', 'scene-0598', 'scene-0599', 'scene-0600', 'scene-0639', 'scene-0640', 'scene-0641', 'scene-0642', 'scene-0643', 'scene-0644', 'scene-0645', 'scene-0646', 'scene-0647', 'scene-0648', 'scene-0649', 'scene-0650', 'scene-0651', 'scene-0652', 'scene-0653', 'scene-0654', 'scene-0655', 'scene-0656', 'scene-0657', 'scene-0658', 'scene-0659', 'scene-0660', 'scene-0661', 'scene-0662', 'scene-0663', 'scene-0664', 'scene-0665', 'scene-0666', 'scene-0667', 'scene-0668', 'scene-0669', 'scene-0670', 'scene-0671', 'scene-0672', 'scene-0673', 'scene-0674', 'scene-0675', 'scene-0676', 'scene-0677', 'scene-0678', 'scene-0679', 'scene-0681', 'scene-0683', 'scene-0684', 'scene-0685', 'scene-0686', 'scene-0687', 'scene-0688', 'scene-0689', 'scene-0695', 'scene-0696', 'scene-0697', 'scene-0698', 'scene-0700', 'scene-0701', 'scene-0703', 'scene-0704', 'scene-0705', 'scene-0706', 'scene-0707', 'scene-0708', 'scene-0709', 'scene-0710', 'scene-0711', 'scene-0712', 'scene-0713', 'scene-0714', 'scene-0715', 'scene-0716', 'scene-0717', 'scene-0718', 'scene-0719', 'scene-0726', 'scene-0727', 'scene-0728', 'scene-0730', 'scene-0731', 'scene-0733', 'scene-0734', 'scene-0735', 'scene-0736', 'scene-0737', 'scene-0738', 'scene-0739', 'scene-0740', 'scene-0741', 'scene-0744', 'scene-0746', 'scene-0747', 'scene-0749', 'scene-0750', 'scene-0751', 'scene-0752', 'scene-0757', 'scene-0758', 'scene-0759', 'scene-0760', 'scene-0761', 'scene-0762', 'scene-0763', 'scene-0764', 'scene-0765', 'scene-0767', 'scene-0768', 'scene-0769', 'scene-0786', 'scene-0787', 'scene-0789', 'scene-0790', 'scene-0791', 'scene-0792', 'scene-0803', 'scene-0804', 'scene-0805', 'scene-0806', 'scene-0808', 'scene-0809', 'scene-0810', 'scene-0811', 'scene-0812', 'scene-0813', 'scene-0815', 'scene-0816', 'scene-0817', 'scene-0819', 'scene-0820', 'scene-0821', 'scene-0822', 'scene-0847', 'scene-0848', 'scene-0849', 'scene-0850', 'scene-0851', 'scene-0852', 'scene-0853', 'scene-0854', 'scene-0855', 'scene-0856', 'scene-0858', 'scene-0860', 'scene-0861', 'scene-0862', 'scene-0863', 'scene-0864', 'scene-0865', 'scene-0866', 'scene-0868', 'scene-0869', 'scene-0870', 'scene-0871', 'scene-0872', 'scene-0873', 'scene-0875', 'scene-0876', 'scene-0877', 'scene-0878', 'scene-0880', 'scene-0882', 'scene-0883', 'scene-0884', 'scene-0885', 'scene-0886', 'scene-0887', 'scene-0888', 'scene-0889', 'scene-0890', 'scene-0891', 'scene-0892', 'scene-0893', 'scene-0894', 'scene-0895', 'scene-0896', 'scene-0897', 'scene-0898', 'scene-0899', 'scene-0900', 'scene-0901', 'scene-0902', 'scene-0903', 'scene-0945', 'scene-0947', 'scene-0949', 'scene-0952', 'scene-0953', 'scene-0955', 'scene-0956', 'scene-0957', 'scene-0958', 'scene-0959', 'scene-0960', 'scene-0961', 'scene-0975', 'scene-0976', 'scene-0977', 'scene-0978', 'scene-0979', 'scene-0980', 'scene-0981', 'scene-0982', 'scene-0983', 'scene-0984', 'scene-0988', 'scene-0989', 'scene-0990', 'scene-0991', 'scene-0992', 'scene-0994', 'scene-0995', 'scene-0996', 'scene-0997', 'scene-0998', 'scene-0999', 'scene-1000', 'scene-1001', 'scene-1002', 'scene-1003', 'scene-1004', 'scene-1005', 'scene-1006', 'scene-1007', 'scene-1008', 'scene-1009', 'scene-1010', 'scene-1011', 'scene-1012', 'scene-1013', 'scene-1014', 'scene-1015', 'scene-1016', 'scene-1017', 'scene-1018', 'scene-1019', 'scene-1020', 'scene-1021', 'scene-1022', 'scene-1023', 'scene-1024', 'scene-1025', 'scene-1044', 'scene-1045', 'scene-1046', 'scene-1047', 'scene-1048', 'scene-1049', 'scene-1050', 'scene-1051', 'scene-1052', 'scene-1053', 'scene-1054', 'scene-1055', 'scene-1056', 'scene-1057', 'scene-1058', 'scene-1074', 'scene-1075', 'scene-1076', 'scene-1077', 'scene-1078', 'scene-1079', 'scene-1080', 'scene-1081', 'scene-1082', 'scene-1083', 'scene-1084', 'scene-1085', 'scene-1086', 'scene-1087', 'scene-1088', 'scene-1089', 'scene-1090', 'scene-1091', 'scene-1092', 'scene-1093', 'scene-1094', 'scene-1095', 'scene-1096', 'scene-1097', 'scene-1098', 'scene-1099', 'scene-1100', 'scene-1101', 'scene-1102', 'scene-1104', 'scene-1105', 'scene-1106', 'scene-1107', 'scene-1108', 'scene-1109', 'scene-1110'] # We use the official validation set as test set. We split scenes either into USA/Singapore or Day/Night. val = [] test = \ ['scene-0003', 'scene-0012', 'scene-0013', 'scene-0014', 'scene-0015', 'scene-0016', 'scene-0017', 'scene-0018', 'scene-0035', 'scene-0036', 'scene-0038', 'scene-0039', 'scene-0092', 'scene-0093', 'scene-0094', 'scene-0095', 'scene-0096', 'scene-0097', 'scene-0098', 'scene-0099', 'scene-0100', 'scene-0101', 'scene-0102', 'scene-0103', 'scene-0104', 'scene-0105', 'scene-0106', 'scene-0107', 'scene-0108', 'scene-0109', 'scene-0110', 'scene-0221', 'scene-0268', 'scene-0269', 'scene-0270', 'scene-0271', 'scene-0272', 'scene-0273', 'scene-0274', 'scene-0275', 'scene-0276', 'scene-0277', 'scene-0278', 'scene-0329', 'scene-0330', 'scene-0331', 'scene-0332', 'scene-0344', 'scene-0345', 'scene-0346', 'scene-0519', 'scene-0520', 'scene-0521', 'scene-0522', 'scene-0523', 'scene-0524', 'scene-0552', 'scene-0553', 'scene-0554', 'scene-0555', 'scene-0556', 'scene-0557', 'scene-0558', 'scene-0559', 'scene-0560', 'scene-0561', 'scene-0562', 'scene-0563', 'scene-0564', 'scene-0565', 'scene-0625', 'scene-0626', 'scene-0627', 'scene-0629', 'scene-0630', 'scene-0632', 'scene-0633', 'scene-0634', 'scene-0635', 'scene-0636', 'scene-0637', 'scene-0638', 'scene-0770', 'scene-0771', 'scene-0775', 'scene-0777', 'scene-0778', 'scene-0780', 'scene-0781', 'scene-0782', 'scene-0783', 'scene-0784', 'scene-0794', 'scene-0795', 'scene-0796', 'scene-0797', 'scene-0798', 'scene-0799', 'scene-0800', 'scene-0802', 'scene-0904', 'scene-0905', 'scene-0906', 'scene-0907', 'scene-0908', 'scene-0909', 'scene-0910', 'scene-0911', 'scene-0912', 'scene-0913', 'scene-0914', 'scene-0915', 'scene-0916', 'scene-0917', 'scene-0919', 'scene-0920', 'scene-0921', 'scene-0922', 'scene-0923', 'scene-0924', 'scene-0925', 'scene-0926', 'scene-0927', 'scene-0928', 'scene-0929', 'scene-0930', 'scene-0931', 'scene-0962', 'scene-0963', 'scene-0966', 'scene-0967', 'scene-0968', 'scene-0969', 'scene-0971', 'scene-0972', 'scene-1059', 'scene-1060', 'scene-1061', 'scene-1062', 'scene-1063', 'scene-1064', 'scene-1065', 'scene-1066', 'scene-1067', 'scene-1068', 'scene-1069', 'scene-1070', 'scene-1071', 'scene-1072', 'scene-1073'] # Exclude some scenes from the training set to use for validation. Depends on split (Day/Night, USA/Singapore). # Note that, we do not produce a validation set on the source datasets (Day, USA), as we validate on target # (Night, Singapore) during training. val_night = [ 'scene-1044', 'scene-1045', 'scene-1046', 'scene-1047', 'scene-1048', 'scene-1049', 'scene-1050', 'scene-1051', 'scene-1052', 'scene-1053', 'scene-1054', 'scene-1055', 'scene-1056', 'scene-1057', 'scene-1058' ] val_singapore = [ 'scene-0004', 'scene-0005', 'scene-0006', 'scene-0007', 'scene-0008', 'scene-0009', 'scene-0010', 'scene-0011', 'scene-0045', 'scene-0046', 'scene-0047', 'scene-0048', 'scene-0049', 'scene-0050', 'scene-0051', 'scene-0052', 'scene-0053', 'scene-0054', 'scene-0347', 'scene-0348', 'scene-0349', 'scene-0356', 'scene-0357', 'scene-0358', 'scene-0359', 'scene-0786', 'scene-0787', 'scene-0789', 'scene-0790', 'scene-0791', 'scene-0792', 'scene-0847', 'scene-0848', 'scene-0849', 'scene-0850', 'scene-0851', 'scene-0852', 'scene-0853', 'scene-0854', 'scene-0855', 'scene-0856', 'scene-0858', 'scene-0860', 'scene-0861', 'scene-0862', 'scene-0863', 'scene-0864', 'scene-0865', 'scene-0866', 'scene-0975', 'scene-0976', 'scene-0977', 'scene-0978', 'scene-0979', 'scene-0980', 'scene-0981', 'scene-0982', 'scene-0983', 'scene-0984', 'scene-0988', 'scene-0989', 'scene-0990', 'scene-0991', 'scene-1044', 'scene-1106', 'scene-1107', 'scene-1108', 'scene-1109', 'scene-1110', ] ================================================ FILE: xmuda/data/semantic_kitti/preprocess.py ================================================ import os import os.path as osp import numpy as np import pickle from PIL import Image import glob import torch from torch.utils.data import Dataset from torch.utils.data.dataloader import DataLoader from xmuda.data.semantic_kitti import splits # prevent "RuntimeError: received 0 items of ancdata" torch.multiprocessing.set_sharing_strategy('file_system') class DummyDataset(Dataset): """Use torch dataloader for multiprocessing""" def __init__(self, root_dir, scenes): self.root_dir = root_dir self.data = [] self.glob_frames(scenes) def glob_frames(self, scenes): for scene in scenes: glob_path = osp.join(self.root_dir, 'dataset', 'sequences', scene, 'image_2', '*.png') cam_paths = sorted(glob.glob(glob_path)) # load calibration calib = self.read_calib(osp.join(self.root_dir, 'dataset', 'sequences', scene, 'calib.txt')) proj_matrix = calib['P2'] @ calib['Tr'] proj_matrix = proj_matrix.astype(np.float32) for cam_path in cam_paths: basename = osp.basename(cam_path) frame_id = osp.splitext(basename)[0] assert frame_id.isdigit() data = { 'camera_path': cam_path, 'lidar_path': osp.join(self.root_dir, 'dataset', 'sequences', scene, 'velodyne', frame_id + '.bin'), 'label_path': osp.join(self.root_dir, 'dataset', 'sequences', scene, 'labels', frame_id + '.label'), 'proj_matrix': proj_matrix } for k, v in data.items(): if isinstance(v, str): if not osp.exists(v): raise IOError('File not found {}'.format(v)) self.data.append(data) @staticmethod def read_calib(calib_path): """ :param calib_path: Path to a calibration text file. :return: dict with calibration matrices. """ calib_all = {} with open(calib_path, 'r') as f: for line in f.readlines(): if line == '\n': break key, value = line.split(':', 1) calib_all[key] = np.array([float(x) for x in value.split()]) # reshape matrices calib_out = {} calib_out['P2'] = calib_all['P2'].reshape(3, 4) # 3x4 projection matrix for left camera calib_out['Tr'] = np.identity(4) # 4x4 matrix calib_out['Tr'][:3, :4] = calib_all['Tr'].reshape(3, 4) return calib_out @staticmethod def select_points_in_frustum(points_2d, x1, y1, x2, y2): """ Select points in a 2D frustum parametrized by x1, y1, x2, y2 in image coordinates :param points_2d: point cloud projected into 2D :param points_3d: point cloud :param x1: left bound :param y1: upper bound :param x2: right bound :param y2: lower bound :return: points (2D and 3D) that are in the frustum """ keep_ind = (points_2d[:, 0] > x1) * \ (points_2d[:, 1] > y1) * \ (points_2d[:, 0] < x2) * \ (points_2d[:, 1] < y2) return keep_ind def __getitem__(self, index): data_dict = self.data[index].copy() scan = np.fromfile(data_dict['lidar_path'], dtype=np.float32) scan = scan.reshape((-1, 4)) points = scan[:, :3] label = np.fromfile(data_dict['label_path'], dtype=np.uint32) label = label.reshape((-1)) label = label & 0xFFFF # get lower half for semantics # load image image = Image.open(data_dict['camera_path']) image_size = image.size # project points into image keep_idx = points[:, 0] > 0 # only keep point in front of the vehicle points_hcoords = np.concatenate([points[keep_idx], np.ones([keep_idx.sum(), 1], dtype=np.float32)], axis=1) img_points = (data_dict['proj_matrix'] @ points_hcoords.T).T img_points = img_points[:, :2] / np.expand_dims(img_points[:, 2], axis=1) # scale 2D points keep_idx_img_pts = self.select_points_in_frustum(img_points, 0, 0, *image_size) keep_idx[keep_idx] = keep_idx_img_pts # fliplr so that indexing is row, col and not col, row img_points = np.fliplr(img_points) # debug # from xmuda.data.utils.visualize import draw_points_image, draw_bird_eye_view # draw_points_image(np.array(image), img_points[keep_idx_img_pts].astype(int), label[keep_idx], # color_palette_type='SemanticKITTI_long') data_dict['seg_label'] = label[keep_idx].astype(np.int16) data_dict['points'] = points[keep_idx] data_dict['points_img'] = img_points[keep_idx_img_pts] data_dict['image_size'] = np.array(image_size) return data_dict def __len__(self): return len(self.data) def preprocess(split_name, root_dir, out_dir): pkl_data = [] split = getattr(splits, split_name) dataloader = DataLoader(DummyDataset(root_dir, split), num_workers=8) num_skips = 0 for i, data_dict in enumerate(dataloader): # data error leads to returning empty dict if not data_dict: print('empty dict, continue') num_skips += 1 continue for k, v in data_dict.items(): data_dict[k] = v[0] print('{}/{} {}'.format(i, len(dataloader), data_dict['lidar_path'])) # convert to relative path lidar_path = data_dict['lidar_path'].replace(root_dir + '/', '') cam_path = data_dict['camera_path'].replace(root_dir + '/', '') # append data out_dict = { 'points': data_dict['points'].numpy(), 'seg_labels': data_dict['seg_label'].numpy(), 'points_img': data_dict['points_img'].numpy(), # row, col format, shape: (num_points, 2) 'lidar_path': lidar_path, 'camera_path': cam_path, 'image_size': tuple(data_dict['image_size'].numpy()) } pkl_data.append(out_dict) print('Skipped {} files'.format(num_skips)) # save to pickle file save_dir = osp.join(out_dir, 'preprocess') os.makedirs(save_dir, exist_ok=True) save_path = osp.join(save_dir, '{}.pkl'.format(split_name)) with open(save_path, 'wb') as f: pickle.dump(pkl_data, f) print('Wrote preprocessed data to ' + save_path) if __name__ == '__main__': root_dir = '/datasets_master/semantic_kitti' out_dir = '/datasets_local/datasets_mjaritz/semantic_kitti_preprocess' preprocess('val', root_dir, out_dir) preprocess('train', root_dir, out_dir) preprocess('test', root_dir, out_dir) ================================================ FILE: xmuda/data/semantic_kitti/semantic_kitti_dataloader.py ================================================ import os.path as osp import pickle from PIL import Image import numpy as np from torch.utils.data import Dataset from torchvision import transforms as T from xmuda.data.utils.refine_pseudo_labels import refine_pseudo_labels from xmuda.data.utils.augmentation_3d import augment_and_scale_3d class SemanticKITTIBase(Dataset): """SemanticKITTI dataset""" # https://github.com/PRBonn/semantic-kitti-api/blob/master/config/semantic-kitti.yaml id_to_class_name = { 0: "unlabeled", 1: "outlier", 10: "car", 11: "bicycle", 13: "bus", 15: "motorcycle", 16: "on-rails", 18: "truck", 20: "other-vehicle", 30: "person", 31: "bicyclist", 32: "motorcyclist", 40: "road", 44: "parking", 48: "sidewalk", 49: "other-ground", 50: "building", 51: "fence", 52: "other-structure", 60: "lane-marking", 70: "vegetation", 71: "trunk", 72: "terrain", 80: "pole", 81: "traffic-sign", 99: "other-object", 252: "moving-car", 253: "moving-bicyclist", 254: "moving-person", 255: "moving-motorcyclist", 256: "moving-on-rails", 257: "moving-bus", 258: "moving-truck", 259: "moving-other-vehicle", } class_name_to_id = {v: k for k, v in id_to_class_name.items()} # use those categories if merge_classes == True (common with A2D2) categories = { 'car': ['car', 'moving-car'], 'truck': ['truck', 'moving-truck'], 'bike': ['bicycle', 'motorcycle', 'bicyclist', 'motorcyclist', 'moving-bicyclist', 'moving-motorcyclist'], # riders are labeled as bikes in Audi dataset 'person': ['person', 'moving-person'], 'road': ['road', 'lane-marking'], 'parking': ['parking'], 'sidewalk': ['sidewalk'], 'building': ['building'], 'nature': ['vegetation', 'trunk', 'terrain'], 'other-objects': ['fence', 'pole', 'traffic-sign', 'other-object'], } def __init__(self, split, preprocess_dir, merge_classes=False, pselab_paths=None ): self.split = split self.preprocess_dir = preprocess_dir print("Initialize SemanticKITTI dataloader") assert isinstance(split, tuple) print('Load', split) self.data = [] for curr_split in split: with open(osp.join(self.preprocess_dir, curr_split + '.pkl'), 'rb') as f: self.data.extend(pickle.load(f)) self.pselab_data = None if pselab_paths: assert isinstance(pselab_paths, tuple) print('Load pseudo label data ', pselab_paths) self.pselab_data = [] for curr_split in pselab_paths: self.pselab_data.extend(np.load(curr_split, allow_pickle=True)) # check consistency of data and pseudo labels assert len(self.pselab_data) == len(self.data) for i in range(len(self.pselab_data)): assert len(self.pselab_data[i]['pseudo_label_2d']) == len(self.data[i]['seg_labels']) # refine 2d pseudo labels probs2d = np.concatenate([data['probs_2d'] for data in self.pselab_data]) pseudo_label_2d = np.concatenate([data['pseudo_label_2d'] for data in self.pselab_data]).astype(np.int) pseudo_label_2d = refine_pseudo_labels(probs2d, pseudo_label_2d) # refine 3d pseudo labels # fusion model has only one final prediction saved in probs_2d if 'probs_3d' in self.pselab_data[0].keys(): probs3d = np.concatenate([data['probs_3d'] for data in self.pselab_data]) pseudo_label_3d = np.concatenate([data['pseudo_label_3d'] for data in self.pselab_data]).astype(np.int) pseudo_label_3d = refine_pseudo_labels(probs3d, pseudo_label_3d) else: pseudo_label_3d = None # undo concat left_idx = 0 for data_idx in range(len(self.pselab_data)): right_idx = left_idx + len(self.pselab_data[data_idx]['probs_2d']) self.pselab_data[data_idx]['pseudo_label_2d'] = pseudo_label_2d[left_idx:right_idx] if pseudo_label_3d is not None: self.pselab_data[data_idx]['pseudo_label_3d'] = pseudo_label_3d[left_idx:right_idx] else: self.pselab_data[data_idx]['pseudo_label_3d'] = None left_idx = right_idx if merge_classes: highest_id = list(self.id_to_class_name.keys())[-1] self.label_mapping = -100 * np.ones(highest_id + 2, dtype=int) for cat_idx, cat_list in enumerate(self.categories.values()): for class_name in cat_list: self.label_mapping[self.class_name_to_id[class_name]] = cat_idx self.class_names = list(self.categories.keys()) else: self.label_mapping = None def __getitem__(self, index): raise NotImplementedError def __len__(self): return len(self.data) class SemanticKITTISCN(SemanticKITTIBase): def __init__(self, split, preprocess_dir, semantic_kitti_dir='', pselab_paths=None, merge_classes=False, scale=20, full_scale=4096, image_normalizer=None, noisy_rot=0.0, # 3D augmentation flip_y=0.0, # 3D augmentation rot_z=0.0, # 3D augmentation transl=False, # 3D augmentation bottom_crop=tuple(), # 2D augmentation (also effects 3D) fliplr=0.0, # 2D augmentation color_jitter=None, # 2D augmentation output_orig=False ): super().__init__(split, preprocess_dir, merge_classes=merge_classes, pselab_paths=pselab_paths) self.semantic_kitti_dir = semantic_kitti_dir self.output_orig = output_orig # point cloud parameters self.scale = scale self.full_scale = full_scale # 3D augmentation self.noisy_rot = noisy_rot self.flip_y = flip_y self.rot_z = rot_z self.transl = transl # image parameters self.image_normalizer = image_normalizer # 2D augmentation self.bottom_crop = bottom_crop self.fliplr = fliplr self.color_jitter = T.ColorJitter(*color_jitter) if color_jitter else None def __getitem__(self, index): data_dict = self.data[index] points = data_dict['points'].copy() seg_label = data_dict['seg_labels'].astype(np.int64) if self.label_mapping is not None: seg_label = self.label_mapping[seg_label] out_dict = {} keep_idx = np.ones(len(points), dtype=np.bool) points_img = data_dict['points_img'].copy() img_path = osp.join(self.semantic_kitti_dir, data_dict['camera_path']) image = Image.open(img_path) if self.bottom_crop: # self.bottom_crop is a tuple (crop_width, crop_height) left = int(np.random.rand() * (image.size[0] + 1 - self.bottom_crop[0])) right = left + self.bottom_crop[0] top = image.size[1] - self.bottom_crop[1] bottom = image.size[1] # update image points keep_idx = points_img[:, 0] >= top keep_idx = np.logical_and(keep_idx, points_img[:, 0] < bottom) keep_idx = np.logical_and(keep_idx, points_img[:, 1] >= left) keep_idx = np.logical_and(keep_idx, points_img[:, 1] < right) # crop image image = image.crop((left, top, right, bottom)) points_img = points_img[keep_idx] points_img[:, 0] -= top points_img[:, 1] -= left # update point cloud points = points[keep_idx] seg_label = seg_label[keep_idx] img_indices = points_img.astype(np.int64) # 2D augmentation if self.color_jitter is not None: image = self.color_jitter(image) # PIL to numpy image = np.array(image, dtype=np.float32, copy=False) / 255. # 2D augmentation if np.random.rand() < self.fliplr: image = np.ascontiguousarray(np.fliplr(image)) img_indices[:, 1] = image.shape[1] - 1 - img_indices[:, 1] # normalize image if self.image_normalizer: mean, std = self.image_normalizer mean = np.asarray(mean, dtype=np.float32) std = np.asarray(std, dtype=np.float32) image = (image - mean) / std out_dict['img'] = np.moveaxis(image, -1, 0) out_dict['img_indices'] = img_indices # 3D data augmentation and scaling from points to voxel indices # Kitti lidar coordinates: x (front), y (left), z (up) coords = augment_and_scale_3d(points, self.scale, self.full_scale, noisy_rot=self.noisy_rot, flip_y=self.flip_y, rot_z=self.rot_z, transl=self.transl) # cast to integer coords = coords.astype(np.int64) # only use voxels inside receptive field idxs = (coords.min(1) >= 0) * (coords.max(1) < self.full_scale) out_dict['coords'] = coords[idxs] out_dict['feats'] = np.ones([len(idxs), 1], np.float32) # simply use 1 as feature out_dict['seg_label'] = seg_label[idxs] out_dict['img_indices'] = out_dict['img_indices'][idxs] if self.pselab_data is not None: out_dict.update({ 'pseudo_label_2d': self.pselab_data[index]['pseudo_label_2d'][keep_idx][idxs], 'pseudo_label_3d': self.pselab_data[index]['pseudo_label_3d'][keep_idx][idxs] }) if self.output_orig: out_dict.update({ 'orig_seg_label': seg_label, 'orig_points_idx': idxs, }) return out_dict def test_SemanticKITTISCN(): from xmuda.data.utils.visualize import draw_points_image_labels, draw_bird_eye_view preprocess_dir = '/datasets_local/datasets_mjaritz/semantic_kitti_preprocess/preprocess' semantic_kitti_dir = '/datasets_local/datasets_mjaritz/semantic_kitti_preprocess' # pselab_paths = ("/home/docker_user/workspace/outputs/xmuda/a2d2_semantic_kitti/xmuda_crop_resize/pselab_data/train.npy",) # split = ('train',) split = ('val',) dataset = SemanticKITTISCN(split=split, preprocess_dir=preprocess_dir, semantic_kitti_dir=semantic_kitti_dir, # pselab_paths=pselab_paths, merge_classes=True, noisy_rot=0.1, flip_y=0.5, rot_z=2*np.pi, transl=True, bottom_crop=(480, 302), fliplr=0.5, color_jitter=(0.4, 0.4, 0.4) ) for i in [10, 20, 30, 40, 50, 60]: data = dataset[i] coords = data['coords'] seg_label = data['seg_label'] img = np.moveaxis(data['img'], 0, 2) img_indices = data['img_indices'] # pseudo_label_2d = data['pseudo_label_2d'] draw_points_image_labels(img, img_indices, seg_label, color_palette_type='SemanticKITTI', point_size=1) # draw_points_image_labels(img, img_indices, pseudo_label_2d, color_palette_type='SemanticKITTI', point_size=1) # assert len(pseudo_label_2d) == len(seg_label) draw_bird_eye_view(coords) def compute_class_weights(): preprocess_dir = '/datasets_local/datasets_mjaritz/semantic_kitti_preprocess/preprocess' split = ('train',) dataset = SemanticKITTIBase(split, preprocess_dir, merge_classes=True ) # compute points per class over whole dataset num_classes = len(dataset.class_names) points_per_class = np.zeros(num_classes, int) for i, data in enumerate(dataset.data): print('{}/{}'.format(i, len(dataset))) labels = dataset.label_mapping[data['seg_labels']] points_per_class += np.bincount(labels[labels != -100], minlength=num_classes) # compute log smoothed class weights class_weights = np.log(5 * points_per_class.sum() / points_per_class) print('log smoothed class weights: ', class_weights / class_weights.min()) if __name__ == '__main__': test_SemanticKITTISCN() # compute_class_weights() ================================================ FILE: xmuda/data/semantic_kitti/splits.py ================================================ # official split defined in https://github.com/PRBonn/semantic-kitti-api/blob/master/config/semantic-kitti.yaml train = [ '00', '01', '02', '03', '04', '05', '06', '09', '10', ] val = [ '07' ] test = [ '08' ] # not used hidden_test = [ '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', ] ================================================ FILE: xmuda/data/utils/augmentation_3d.py ================================================ import numpy as np def augment_and_scale_3d(points, scale, full_scale, noisy_rot=0.0, flip_x=0.0, flip_y=0.0, rot_z=0.0, transl=False): """ 3D point cloud augmentation and scaling from points (in meters) to voxels :param points: 3D points in meters :param scale: voxel scale in 1 / m, e.g. 20 corresponds to 5cm voxels :param full_scale: size of the receptive field of SparseConvNet :param noisy_rot: scale of random noise added to all elements of a rotation matrix :param flip_x: probability of flipping the x-axis (left-right in nuScenes LiDAR coordinate system) :param flip_y: probability of flipping the y-axis (left-right in Kitti LiDAR coordinate system) :param rot_z: angle in rad around the z-axis (up-axis) :param transl: True or False, random translation inside the receptive field of the SCN, defined by full_scale :return coords: the coordinates that are given as input to SparseConvNet """ if noisy_rot > 0 or flip_x > 0 or flip_y > 0 or rot_z > 0: rot_matrix = np.eye(3, dtype=np.float32) if noisy_rot > 0: # add noise to rotation matrix rot_matrix += np.random.randn(3, 3) * noisy_rot if flip_x > 0: # flip x axis: multiply element at (0, 0) with 1 or -1 rot_matrix[0][0] *= np.random.randint(0, 2) * 2 - 1 if flip_y > 0: # flip y axis: multiply element at (1, 1) with 1 or -1 rot_matrix[1][1] *= np.random.randint(0, 2) * 2 - 1 if rot_z > 0: # rotate around z-axis (up-axis) theta = np.random.rand() * rot_z z_rot_matrix = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]], dtype=np.float32) rot_matrix = rot_matrix.dot(z_rot_matrix) points = points.dot(rot_matrix) # scale with inverse voxel size (e.g. 20 corresponds to 5cm) coords = points * scale # translate points to positive octant (receptive field of SCN in x, y, z coords is in interval [0, full_scale]) coords -= coords.min(0) if transl: # random translation inside receptive field of SCN offset = np.clip(full_scale - coords.max(0) - 0.001, a_min=0, a_max=None) * np.random.rand(3) coords += offset return coords ================================================ FILE: xmuda/data/utils/evaluate.py ================================================ import numpy as np from sklearn.metrics import confusion_matrix as CM class Evaluator(object): def __init__(self, class_names, labels=None): self.class_names = tuple(class_names) self.num_classes = len(class_names) self.labels = np.arange(self.num_classes) if labels is None else np.array(labels) assert self.labels.shape[0] == self.num_classes self.confusion_matrix = np.zeros((self.num_classes, self.num_classes)) def update(self, pred_label, gt_label): """Update per instance Args: pred_label (np.ndarray): (num_points) gt_label (np.ndarray): (num_points,) """ # convert ignore_label to num_classes # refer to sklearn.metrics.confusion_matrix gt_label[gt_label == -100] = self.num_classes confusion_matrix = CM(gt_label.flatten(), pred_label.flatten(), labels=self.labels) self.confusion_matrix += confusion_matrix def batch_update(self, pred_labels, gt_labels): assert len(pred_labels) == len(gt_labels) for pred_label, gt_label in zip(pred_labels, gt_labels): self.update(pred_label, gt_label) @property def overall_acc(self): return np.sum(np.diag(self.confusion_matrix)) / np.sum(self.confusion_matrix) @property def overall_iou(self): class_iou = np.array(self.class_iou.copy()) class_iou[np.isnan(class_iou)] = 0 return np.mean(class_iou) @property def class_seg_acc(self): return [self.confusion_matrix[i, i] / np.sum(self.confusion_matrix[i]) for i in range(self.num_classes)] @property def class_iou(self): iou_list = [] for i in range(self.num_classes): tp = self.confusion_matrix[i, i] p = self.confusion_matrix[:, i].sum() g = self.confusion_matrix[i, :].sum() union = p + g - tp if union == 0: iou = float('nan') else: iou = tp / union iou_list.append(iou) return iou_list def print_table(self): from tabulate import tabulate header = ['Class', 'Accuracy', 'IOU', 'Total'] seg_acc_per_class = self.class_seg_acc iou_per_class = self.class_iou table = [] for ind, class_name in enumerate(self.class_names): table.append([class_name, seg_acc_per_class[ind] * 100, iou_per_class[ind] * 100, int(self.confusion_matrix[ind].sum()), ]) return tabulate(table, headers=header, tablefmt='psql', floatfmt='.2f') def save_table(self, filename): from tabulate import tabulate header = ('overall acc', 'overall iou') + self.class_names table = [[self.overall_acc, self.overall_iou] + self.class_iou] with open(filename, 'w') as f: # In order to unify format, remove all the alignments. f.write(tabulate(table, headers=header, tablefmt='tsv', floatfmt='.5f', numalign=None, stralign=None)) ================================================ FILE: xmuda/data/utils/refine_pseudo_labels.py ================================================ import torch def refine_pseudo_labels(probs, pseudo_label, ignore_label=-100): """ Reference: https://github.com/liyunsheng13/BDL/blob/master/SSL.py Per class, set the less confident half of labels to ignore label. :param probs: maximum probabilities (N,), where N is the number of 3D points :param pseudo_label: predicted label which had maximum probability (N,) :param ignore_label: :return: """ probs, pseudo_label = torch.tensor(probs), torch.tensor(pseudo_label) for cls_idx in pseudo_label.unique(): curr_idx = pseudo_label == cls_idx curr_idx = curr_idx.nonzero().squeeze(1) thresh = probs[curr_idx].median() thresh = min(thresh, 0.9) ignore_idx = curr_idx[probs[curr_idx] < thresh] pseudo_label[ignore_idx] = ignore_label return pseudo_label.numpy() ================================================ FILE: xmuda/data/utils/turbo_cmap.py ================================================ # Reference: https://gist.github.com/mikhailov-work/ee72ba4191942acecc03fe6da94fc73f # Copyright 2019 Google LLC. # SPDX-License-Identifier: Apache-2.0 # Author: Anton Mikhailov turbo_colormap_data = [[0.18995,0.07176,0.23217],[0.19483,0.08339,0.26149],[0.19956,0.09498,0.29024],[0.20415,0.10652,0.31844],[0.20860,0.11802,0.34607],[0.21291,0.12947,0.37314],[0.21708,0.14087,0.39964],[0.22111,0.15223,0.42558],[0.22500,0.16354,0.45096],[0.22875,0.17481,0.47578],[0.23236,0.18603,0.50004],[0.23582,0.19720,0.52373],[0.23915,0.20833,0.54686],[0.24234,0.21941,0.56942],[0.24539,0.23044,0.59142],[0.24830,0.24143,0.61286],[0.25107,0.25237,0.63374],[0.25369,0.26327,0.65406],[0.25618,0.27412,0.67381],[0.25853,0.28492,0.69300],[0.26074,0.29568,0.71162],[0.26280,0.30639,0.72968],[0.26473,0.31706,0.74718],[0.26652,0.32768,0.76412],[0.26816,0.33825,0.78050],[0.26967,0.34878,0.79631],[0.27103,0.35926,0.81156],[0.27226,0.36970,0.82624],[0.27334,0.38008,0.84037],[0.27429,0.39043,0.85393],[0.27509,0.40072,0.86692],[0.27576,0.41097,0.87936],[0.27628,0.42118,0.89123],[0.27667,0.43134,0.90254],[0.27691,0.44145,0.91328],[0.27701,0.45152,0.92347],[0.27698,0.46153,0.93309],[0.27680,0.47151,0.94214],[0.27648,0.48144,0.95064],[0.27603,0.49132,0.95857],[0.27543,0.50115,0.96594],[0.27469,0.51094,0.97275],[0.27381,0.52069,0.97899],[0.27273,0.53040,0.98461],[0.27106,0.54015,0.98930],[0.26878,0.54995,0.99303],[0.26592,0.55979,0.99583],[0.26252,0.56967,0.99773],[0.25862,0.57958,0.99876],[0.25425,0.58950,0.99896],[0.24946,0.59943,0.99835],[0.24427,0.60937,0.99697],[0.23874,0.61931,0.99485],[0.23288,0.62923,0.99202],[0.22676,0.63913,0.98851],[0.22039,0.64901,0.98436],[0.21382,0.65886,0.97959],[0.20708,0.66866,0.97423],[0.20021,0.67842,0.96833],[0.19326,0.68812,0.96190],[0.18625,0.69775,0.95498],[0.17923,0.70732,0.94761],[0.17223,0.71680,0.93981],[0.16529,0.72620,0.93161],[0.15844,0.73551,0.92305],[0.15173,0.74472,0.91416],[0.14519,0.75381,0.90496],[0.13886,0.76279,0.89550],[0.13278,0.77165,0.88580],[0.12698,0.78037,0.87590],[0.12151,0.78896,0.86581],[0.11639,0.79740,0.85559],[0.11167,0.80569,0.84525],[0.10738,0.81381,0.83484],[0.10357,0.82177,0.82437],[0.10026,0.82955,0.81389],[0.09750,0.83714,0.80342],[0.09532,0.84455,0.79299],[0.09377,0.85175,0.78264],[0.09287,0.85875,0.77240],[0.09267,0.86554,0.76230],[0.09320,0.87211,0.75237],[0.09451,0.87844,0.74265],[0.09662,0.88454,0.73316],[0.09958,0.89040,0.72393],[0.10342,0.89600,0.71500],[0.10815,0.90142,0.70599],[0.11374,0.90673,0.69651],[0.12014,0.91193,0.68660],[0.12733,0.91701,0.67627],[0.13526,0.92197,0.66556],[0.14391,0.92680,0.65448],[0.15323,0.93151,0.64308],[0.16319,0.93609,0.63137],[0.17377,0.94053,0.61938],[0.18491,0.94484,0.60713],[0.19659,0.94901,0.59466],[0.20877,0.95304,0.58199],[0.22142,0.95692,0.56914],[0.23449,0.96065,0.55614],[0.24797,0.96423,0.54303],[0.26180,0.96765,0.52981],[0.27597,0.97092,0.51653],[0.29042,0.97403,0.50321],[0.30513,0.97697,0.48987],[0.32006,0.97974,0.47654],[0.33517,0.98234,0.46325],[0.35043,0.98477,0.45002],[0.36581,0.98702,0.43688],[0.38127,0.98909,0.42386],[0.39678,0.99098,0.41098],[0.41229,0.99268,0.39826],[0.42778,0.99419,0.38575],[0.44321,0.99551,0.37345],[0.45854,0.99663,0.36140],[0.47375,0.99755,0.34963],[0.48879,0.99828,0.33816],[0.50362,0.99879,0.32701],[0.51822,0.99910,0.31622],[0.53255,0.99919,0.30581],[0.54658,0.99907,0.29581],[0.56026,0.99873,0.28623],[0.57357,0.99817,0.27712],[0.58646,0.99739,0.26849],[0.59891,0.99638,0.26038],[0.61088,0.99514,0.25280],[0.62233,0.99366,0.24579],[0.63323,0.99195,0.23937],[0.64362,0.98999,0.23356],[0.65394,0.98775,0.22835],[0.66428,0.98524,0.22370],[0.67462,0.98246,0.21960],[0.68494,0.97941,0.21602],[0.69525,0.97610,0.21294],[0.70553,0.97255,0.21032],[0.71577,0.96875,0.20815],[0.72596,0.96470,0.20640],[0.73610,0.96043,0.20504],[0.74617,0.95593,0.20406],[0.75617,0.95121,0.20343],[0.76608,0.94627,0.20311],[0.77591,0.94113,0.20310],[0.78563,0.93579,0.20336],[0.79524,0.93025,0.20386],[0.80473,0.92452,0.20459],[0.81410,0.91861,0.20552],[0.82333,0.91253,0.20663],[0.83241,0.90627,0.20788],[0.84133,0.89986,0.20926],[0.85010,0.89328,0.21074],[0.85868,0.88655,0.21230],[0.86709,0.87968,0.21391],[0.87530,0.87267,0.21555],[0.88331,0.86553,0.21719],[0.89112,0.85826,0.21880],[0.89870,0.85087,0.22038],[0.90605,0.84337,0.22188],[0.91317,0.83576,0.22328],[0.92004,0.82806,0.22456],[0.92666,0.82025,0.22570],[0.93301,0.81236,0.22667],[0.93909,0.80439,0.22744],[0.94489,0.79634,0.22800],[0.95039,0.78823,0.22831],[0.95560,0.78005,0.22836],[0.96049,0.77181,0.22811],[0.96507,0.76352,0.22754],[0.96931,0.75519,0.22663],[0.97323,0.74682,0.22536],[0.97679,0.73842,0.22369],[0.98000,0.73000,0.22161],[0.98289,0.72140,0.21918],[0.98549,0.71250,0.21650],[0.98781,0.70330,0.21358],[0.98986,0.69382,0.21043],[0.99163,0.68408,0.20706],[0.99314,0.67408,0.20348],[0.99438,0.66386,0.19971],[0.99535,0.65341,0.19577],[0.99607,0.64277,0.19165],[0.99654,0.63193,0.18738],[0.99675,0.62093,0.18297],[0.99672,0.60977,0.17842],[0.99644,0.59846,0.17376],[0.99593,0.58703,0.16899],[0.99517,0.57549,0.16412],[0.99419,0.56386,0.15918],[0.99297,0.55214,0.15417],[0.99153,0.54036,0.14910],[0.98987,0.52854,0.14398],[0.98799,0.51667,0.13883],[0.98590,0.50479,0.13367],[0.98360,0.49291,0.12849],[0.98108,0.48104,0.12332],[0.97837,0.46920,0.11817],[0.97545,0.45740,0.11305],[0.97234,0.44565,0.10797],[0.96904,0.43399,0.10294],[0.96555,0.42241,0.09798],[0.96187,0.41093,0.09310],[0.95801,0.39958,0.08831],[0.95398,0.38836,0.08362],[0.94977,0.37729,0.07905],[0.94538,0.36638,0.07461],[0.94084,0.35566,0.07031],[0.93612,0.34513,0.06616],[0.93125,0.33482,0.06218],[0.92623,0.32473,0.05837],[0.92105,0.31489,0.05475],[0.91572,0.30530,0.05134],[0.91024,0.29599,0.04814],[0.90463,0.28696,0.04516],[0.89888,0.27824,0.04243],[0.89298,0.26981,0.03993],[0.88691,0.26152,0.03753],[0.88066,0.25334,0.03521],[0.87422,0.24526,0.03297],[0.86760,0.23730,0.03082],[0.86079,0.22945,0.02875],[0.85380,0.22170,0.02677],[0.84662,0.21407,0.02487],[0.83926,0.20654,0.02305],[0.83172,0.19912,0.02131],[0.82399,0.19182,0.01966],[0.81608,0.18462,0.01809],[0.80799,0.17753,0.01660],[0.79971,0.17055,0.01520],[0.79125,0.16368,0.01387],[0.78260,0.15693,0.01264],[0.77377,0.15028,0.01148],[0.76476,0.14374,0.01041],[0.75556,0.13731,0.00942],[0.74617,0.13098,0.00851],[0.73661,0.12477,0.00769],[0.72686,0.11867,0.00695],[0.71692,0.11268,0.00629],[0.70680,0.10680,0.00571],[0.69650,0.10102,0.00522],[0.68602,0.09536,0.00481],[0.67535,0.08980,0.00449],[0.66449,0.08436,0.00424],[0.65345,0.07902,0.00408],[0.64223,0.07380,0.00401],[0.63082,0.06868,0.00401],[0.61923,0.06367,0.00410],[0.60746,0.05878,0.00427],[0.59550,0.05399,0.00453],[0.58336,0.04931,0.00486],[0.57103,0.04474,0.00529],[0.55852,0.04028,0.00579],[0.54583,0.03593,0.00638],[0.53295,0.03169,0.00705],[0.51989,0.02756,0.00780],[0.50664,0.02354,0.00863],[0.49321,0.01963,0.00955],[0.47960,0.01583,0.01055]] # The look-up table contains 256 entries. Each entry is a floating point sRGB triplet. # To use it with matplotlib, pass cmap=ListedColormap(turbo_colormap_data) as an arg to imshow() (don't forget "from matplotlib.colors import ListedColormap"). # If you have a typical 8-bit greyscale image, you can use the 8-bit value to index into this LUT directly. # The floating point color values can be converted to 8-bit sRGB via multiplying by 255 and casting/flooring to an integer. Saturation should not be required for IEEE-754 compliant arithmetic. # If you have a floating point value in the range [0,1], you can use interpolate() to linearly interpolate between the entries. # If you have 16-bit or 32-bit integer values, convert them to floating point values on the [0,1] range and then use interpolate(). Doing the interpolation in floating point will reduce banding. # If some of your values may lie outside the [0,1] range, use interpolate_or_clip() to highlight them. def interpolate(colormap, x): x = max(0.0, min(1.0, x)) a = int(x*255.0) b = min(255, a + 1) f = x*255.0 - a return [colormap[a][0] + (colormap[b][0] - colormap[a][0]) * f, colormap[a][1] + (colormap[b][1] - colormap[a][1]) * f, colormap[a][2] + (colormap[b][2] - colormap[a][2]) * f] def interpolate_or_clip(colormap, x): if x < 0.0: return [0.0, 0.0, 0.0] elif x > 1.0: return [1.0, 1.0, 1.0] else: return interpolate(colormap, x) ================================================ FILE: xmuda/data/utils/validate.py ================================================ import numpy as np import logging import time import torch import torch.nn.functional as F from xmuda.data.utils.evaluate import Evaluator def validate(cfg, model_2d, model_3d, dataloader, val_metric_logger, pselab_path=None): logger = logging.getLogger('xmuda.validate') logger.info('Validation') # evaluator class_names = dataloader.dataset.class_names evaluator_2d = Evaluator(class_names) evaluator_3d = Evaluator(class_names) if model_3d else None evaluator_ensemble = Evaluator(class_names) if model_3d else None pselab_data_list = [] end = time.time() with torch.no_grad(): for iteration, data_batch in enumerate(dataloader): data_time = time.time() - end # copy data from cpu to gpu if 'SCN' in cfg.DATASET_TARGET.TYPE: data_batch['x'][1] = data_batch['x'][1].cuda() data_batch['seg_label'] = data_batch['seg_label'].cuda() data_batch['img'] = data_batch['img'].cuda() else: raise NotImplementedError # predict preds_2d = model_2d(data_batch) preds_3d = model_3d(data_batch) if model_3d else None pred_label_voxel_2d = preds_2d['seg_logit'].argmax(1).cpu().numpy() pred_label_voxel_3d = preds_3d['seg_logit'].argmax(1).cpu().numpy() if model_3d else None # softmax average (ensembling) probs_2d = F.softmax(preds_2d['seg_logit'], dim=1) probs_3d = F.softmax(preds_3d['seg_logit'], dim=1) if model_3d else None pred_label_voxel_ensemble = (probs_2d + probs_3d).argmax(1).cpu().numpy() if model_3d else None # get original point cloud from before voxelization seg_label = data_batch['orig_seg_label'] points_idx = data_batch['orig_points_idx'] # loop over batch left_idx = 0 for batch_ind in range(len(seg_label)): curr_points_idx = points_idx[batch_ind] # check if all points have predictions (= all voxels inside receptive field) assert np.all(curr_points_idx) curr_seg_label = seg_label[batch_ind] right_idx = left_idx + curr_points_idx.sum() pred_label_2d = pred_label_voxel_2d[left_idx:right_idx] pred_label_3d = pred_label_voxel_3d[left_idx:right_idx] if model_3d else None pred_label_ensemble = pred_label_voxel_ensemble[left_idx:right_idx] if model_3d else None # evaluate evaluator_2d.update(pred_label_2d, curr_seg_label) if model_3d: evaluator_3d.update(pred_label_3d, curr_seg_label) evaluator_ensemble.update(pred_label_ensemble, curr_seg_label) if pselab_path is not None: assert np.all(pred_label_2d >= 0) curr_probs_2d = probs_2d[left_idx:right_idx] curr_probs_3d = probs_3d[left_idx:right_idx] if model_3d else None pselab_data_list.append({ 'probs_2d': curr_probs_2d[range(len(pred_label_2d)), pred_label_2d].cpu().numpy(), 'pseudo_label_2d': pred_label_2d.astype(np.uint8), 'probs_3d': curr_probs_3d[range(len(pred_label_3d)), pred_label_3d].cpu().numpy() if model_3d else None, 'pseudo_label_3d': pred_label_3d.astype(np.uint8) if model_3d else None }) left_idx = right_idx seg_loss_2d = F.cross_entropy(preds_2d['seg_logit'], data_batch['seg_label']) seg_loss_3d = F.cross_entropy(preds_3d['seg_logit'], data_batch['seg_label']) if model_3d else None val_metric_logger.update(seg_loss_2d=seg_loss_2d) if seg_loss_3d is not None: val_metric_logger.update(seg_loss_3d=seg_loss_3d) batch_time = time.time() - end val_metric_logger.update(time=batch_time, data=data_time) end = time.time() # log cur_iter = iteration + 1 if cur_iter == 1 or (cfg.VAL.LOG_PERIOD > 0 and cur_iter % cfg.VAL.LOG_PERIOD == 0): logger.info( val_metric_logger.delimiter.join( [ 'iter: {iter}/{total_iter}', '{meters}', 'max mem: {memory:.0f}', ] ).format( iter=cur_iter, total_iter=len(dataloader), meters=str(val_metric_logger), memory=torch.cuda.max_memory_allocated() / (1024.0 ** 2), ) ) val_metric_logger.update(seg_iou_2d=evaluator_2d.overall_iou) if evaluator_3d is not None: val_metric_logger.update(seg_iou_3d=evaluator_3d.overall_iou) eval_list = [('2D', evaluator_2d)] if model_3d: eval_list.extend([('3D', evaluator_3d), ('2D+3D', evaluator_ensemble)]) for modality, evaluator in eval_list: logger.info('{} overall accuracy={:.2f}%'.format(modality, 100.0 * evaluator.overall_acc)) logger.info('{} overall IOU={:.2f}'.format(modality, 100.0 * evaluator.overall_iou)) logger.info('{} class-wise segmentation accuracy and IoU.\n{}'.format(modality, evaluator.print_table())) if pselab_path is not None: np.save(pselab_path, pselab_data_list) logger.info('Saved pseudo label data to {}'.format(pselab_path)) ================================================ FILE: xmuda/data/utils/visualize.py ================================================ import matplotlib.pyplot as plt import numpy as np from xmuda.data.utils.turbo_cmap import interpolate_or_clip, turbo_colormap_data # all classes NUSCENES_COLOR_PALETTE = [ (255, 158, 0), # car (255, 158, 0), # truck (255, 158, 0), # bus (255, 158, 0), # trailer (255, 158, 0), # construction_vehicle (0, 0, 230), # pedestrian (255, 61, 99), # motorcycle (255, 61, 99), # bicycle (0, 0, 0), # traffic_cone (0, 0, 0), # barrier (200, 200, 200), # background ] # classes after merging (as used in xMUDA) NUSCENES_COLOR_PALETTE_SHORT = [ (255, 158, 0), # vehicle (0, 0, 230), # pedestrian (255, 61, 99), # bike (0, 0, 0), # traffic boundary (200, 200, 200), # background ] # all classes A2D2_COLOR_PALETTE_SHORT = [ (255, 0, 0), # car (255, 128, 0), # truck (182, 89, 6), # bike (204, 153, 255), # person (255, 0, 255), # road (150, 150, 200), # parking (180, 150, 200), # sidewalk (241, 230, 255), # building (147, 253, 194), # nature (255, 246, 143), # other-objects (0, 0, 0) # ignore ] # colors as defined in https://github.com/PRBonn/semantic-kitti-api/blob/master/config/semantic-kitti.yaml SEMANTIC_KITTI_ID_TO_BGR = { # bgr 0: [0, 0, 0], 1: [0, 0, 255], 10: [245, 150, 100], 11: [245, 230, 100], 13: [250, 80, 100], 15: [150, 60, 30], 16: [255, 0, 0], 18: [180, 30, 80], 20: [255, 0, 0], 30: [30, 30, 255], 31: [200, 40, 255], 32: [90, 30, 150], 40: [255, 0, 255], 44: [255, 150, 255], 48: [75, 0, 75], 49: [75, 0, 175], 50: [0, 200, 255], 51: [50, 120, 255], 52: [0, 150, 255], 60: [170, 255, 150], 70: [0, 175, 0], 71: [0, 60, 135], 72: [80, 240, 150], 80: [150, 240, 255], 81: [0, 0, 255], 99: [255, 255, 50], 252: [245, 150, 100], 256: [255, 0, 0], 253: [200, 40, 255], 254: [30, 30, 255], 255: [90, 30, 150], 257: [250, 80, 100], 258: [180, 30, 80], 259: [255, 0, 0], } SEMANTIC_KITTI_COLOR_PALETTE = [SEMANTIC_KITTI_ID_TO_BGR[id] if id in SEMANTIC_KITTI_ID_TO_BGR.keys() else [0, 0, 0] for id in range(list(SEMANTIC_KITTI_ID_TO_BGR.keys())[-1] + 1)] # classes after merging (as used in xMUDA) SEMANTIC_KITTI_COLOR_PALETTE_SHORT_BGR = [ [245, 150, 100], # car [180, 30, 80], # truck [150, 60, 30], # bike [30, 30, 255], # person [255, 0, 255], # road [255, 150, 255], # parking [75, 0, 75], # sidewalk [0, 200, 255], # building [0, 175, 0], # nature [255, 255, 50], # other-objects [0, 0, 0], # ignore ] SEMANTIC_KITTI_COLOR_PALETTE_SHORT = [(c[2], c[1], c[0]) for c in SEMANTIC_KITTI_COLOR_PALETTE_SHORT_BGR] def draw_points_image_labels(img, img_indices, seg_labels, show=True, color_palette_type='NuScenes', point_size=0.5): if color_palette_type == 'NuScenes': color_palette = NUSCENES_COLOR_PALETTE_SHORT elif color_palette_type == 'A2D2': color_palette = A2D2_COLOR_PALETTE_SHORT elif color_palette_type == 'SemanticKITTI': color_palette = SEMANTIC_KITTI_COLOR_PALETTE_SHORT elif color_palette_type == 'SemanticKITTI_long': color_palette = SEMANTIC_KITTI_COLOR_PALETTE else: raise NotImplementedError('Color palette type not supported') color_palette = np.array(color_palette) / 255. seg_labels[seg_labels == -100] = len(color_palette) - 1 colors = color_palette[seg_labels] plt.imshow(img) plt.scatter(img_indices[:, 1], img_indices[:, 0], c=colors, alpha=0.5, s=point_size) plt.axis('off') if show: plt.show() def normalize_depth(depth, d_min, d_max): # normalize linearly between d_min and d_max data = np.clip(depth, d_min, d_max) return (data - d_min) / (d_max - d_min) def draw_points_image_depth(img, img_indices, depth, show=True, point_size=0.5): # depth = normalize_depth(depth, d_min=3., d_max=50.) depth = normalize_depth(depth, d_min=depth.min(), d_max=depth.max()) colors = [] for depth_val in depth: colors.append(interpolate_or_clip(colormap=turbo_colormap_data, x=depth_val)) # ax5.imshow(np.full_like(img, 255)) plt.imshow(img) plt.scatter(img_indices[:, 1], img_indices[:, 0], c=colors, alpha=0.5, s=point_size) plt.axis('off') if show: plt.show() def draw_bird_eye_view(coords, full_scale=4096): plt.scatter(coords[:, 0], coords[:, 1], s=0.1) plt.xlim([0, full_scale]) plt.ylim([0, full_scale]) plt.gca().set_aspect('equal', adjustable='box') plt.show() ================================================ FILE: xmuda/models/build.py ================================================ from xmuda.models.xmuda_arch import Net2DSeg, Net3DSeg from xmuda.models.metric import SegIoU def build_model_2d(cfg): model = Net2DSeg(num_classes=cfg.MODEL_2D.NUM_CLASSES, backbone_2d=cfg.MODEL_2D.TYPE, backbone_2d_kwargs=cfg.MODEL_2D[cfg.MODEL_2D.TYPE], dual_head=cfg.MODEL_2D.DUAL_HEAD ) train_metric = SegIoU(cfg.MODEL_2D.NUM_CLASSES, name='seg_iou_2d') return model, train_metric def build_model_3d(cfg): model = Net3DSeg(num_classes=cfg.MODEL_3D.NUM_CLASSES, backbone_3d=cfg.MODEL_3D.TYPE, backbone_3d_kwargs=cfg.MODEL_3D[cfg.MODEL_3D.TYPE], dual_head=cfg.MODEL_3D.DUAL_HEAD ) train_metric = SegIoU(cfg.MODEL_3D.NUM_CLASSES, name='seg_iou_3d') return model, train_metric ================================================ FILE: xmuda/models/losses.py ================================================ import numpy as np import torch import logging def entropy_loss(v): """ Entropy loss for probabilistic prediction vectors input: batch_size x classes x points output: batch_size x 1 x points """ # (num points, num classes) if v.dim() == 2: v = v.transpose(0, 1) v = v.unsqueeze(0) # (1, num_classes, num_points) assert v.dim() == 3 n, c, p = v.size() return -torch.sum(torch.mul(v, torch.log2(v + 1e-30))) / (n * p * np.log2(c)) def logcoral_loss(x_src, x_trg): """ Geodesic loss (log coral loss), reference: https://github.com/pmorerio/minimal-entropy-correlation-alignment/blob/master/svhn2mnist/model.py :param x_src: source features of size (N, ..., F), where N is the batch size and F is the feature size :param x_trg: target features of size (N, ..., F), where N is the batch size and F is the feature size :return: geodesic distance between the x_src and x_trg """ # check if the feature size is the same, so that the covariance matrices will have the same dimensions assert x_src.shape[-1] == x_trg.shape[-1] assert x_src.dim() >= 2 batch_size = x_src.shape[0] if x_src.dim() > 2: # reshape from (N1, N2, ..., NM, F) to (N1 * N2 * ... * NM, F) x_src = x_src.flatten(end_dim=-2) x_trg = x_trg.flatten(end_dim=-2) # subtract the mean over the batch x_src = x_src - torch.mean(x_src, 0) x_trg = x_trg - torch.mean(x_trg, 0) # compute covariance factor = 1. / (batch_size - 1) cov_src = factor * torch.mm(x_src.t(), x_src) cov_trg = factor * torch.mm(x_trg.t(), x_trg) # dirty workaround to prevent GPU memory error due to MAGMA (used in SVD) # this implementation achieves loss of zero without creating a fork in the computation graph # if there is a nan or big number in the cov matrix, use where (not if!) to set cov matrix to identity matrix condition = (cov_src > 1e30).any() or (cov_trg > 1e30).any() or torch.isnan(cov_src).any() or torch.isnan(cov_trg).any() cov_src = torch.where(torch.full_like(cov_src, condition, dtype=torch.uint8), torch.eye(cov_src.shape[0], device=cov_src.device), cov_src) cov_trg = torch.where(torch.full_like(cov_trg, condition, dtype=torch.uint8), torch.eye(cov_trg.shape[0], device=cov_trg.device), cov_trg) if condition: logger = logging.getLogger('xmuda.train') logger.info('Big number > 1e30 or nan in covariance matrix, return loss of 0 to prevent error in SVD decomposition.') _, e_src, v_src = cov_src.svd() _, e_trg, v_trg = cov_trg.svd() # nan can occur when taking log of a value near 0 (problem occurs if the cov matrix is of low rank) log_cov_src = torch.mm(v_src, torch.mm(torch.diag(torch.log(e_src)), v_src.t())) log_cov_trg = torch.mm(v_trg, torch.mm(torch.diag(torch.log(e_trg)), v_trg.t())) # Frobenius norm return torch.mean((log_cov_src - log_cov_trg) ** 2) ================================================ FILE: xmuda/models/metric.py ================================================ import torch from xmuda.common.utils.metric_logger import AverageMeter class SegAccuracy(AverageMeter): """Segmentation accuracy""" name = 'seg_acc' def __init__(self, ignore_index=-100): super(SegAccuracy, self).__init__() self.ignore_index = ignore_index def update_dict(self, preds, labels): seg_logit = preds['seg_logit'] # (b, c, n) seg_label = labels['seg_label'] # (b, n) pred_label = seg_logit.argmax(1) mask = (seg_label != self.ignore_index) seg_label = seg_label[mask] pred_label = pred_label[mask] tp_mask = pred_label.eq(seg_label) # (b, n) self.update(tp_mask.sum().item(), tp_mask.numel()) class SegIoU(object): """Segmentation IoU References: https://github.com/pytorch/vision/blob/master/references/segmentation/utils.py """ def __init__(self, num_classes, ignore_index=-100, name='seg_iou'): self.num_classes = num_classes self.ignore_index = ignore_index self.mat = None self.name = name def update_dict(self, preds, labels): seg_logit = preds['seg_logit'] # (batch_size, num_classes, num_points) seg_label = labels['seg_label'] # (batch_size, num_points) pred_label = seg_logit.argmax(1) mask = (seg_label != self.ignore_index) seg_label = seg_label[mask] pred_label = pred_label[mask] # Update confusion matrix # TODO: Compare the speed between torch.histogram and torch.bincount after pytorch v1.1.0 n = self.num_classes with torch.no_grad(): if self.mat is None: self.mat = seg_label.new_zeros((n, n)) inds = n * seg_label + pred_label self.mat += torch.bincount(inds, minlength=n ** 2).reshape(n, n) def reset(self): self.mat = None @property def iou(self): h = self.mat.float() iou = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h)) return iou @property def global_avg(self): return self.iou.mean().item() @property def avg(self): return self.global_avg def __str__(self): return '{iou:.4f}'.format(iou=self.iou.mean().item()) @property def summary_str(self): return str(self) ================================================ FILE: xmuda/models/resnet34_unet.py ================================================ """UNet based on ResNet34""" import torch import torch.nn as nn import torch.nn.functional as F from torchvision.models.resnet import resnet34 class UNetResNet34(nn.Module): def __init__(self, pretrained=True): super(UNetResNet34, self).__init__() # ----------------------------------------------------------------------------- # # Encoder # ----------------------------------------------------------------------------- # net = resnet34(pretrained) # Note that we do not downsample for conv1 # self.conv1 = net.conv1 self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=1, padding=3, bias=False) self.conv1.weight.data = net.conv1.weight.data self.bn1 = net.bn1 self.relu = net.relu self.maxpool = net.maxpool self.layer1 = net.layer1 self.layer2 = net.layer2 self.layer3 = net.layer3 self.layer4 = net.layer4 # ----------------------------------------------------------------------------- # # Decoder # ----------------------------------------------------------------------------- # _, self.dec_t_conv_stage5 = self.dec_stage(self.layer4, num_concat=1) self.dec_conv_stage4, self.dec_t_conv_stage4 = self.dec_stage(self.layer3, num_concat=2) self.dec_conv_stage3, self.dec_t_conv_stage3 = self.dec_stage(self.layer2, num_concat=2) self.dec_conv_stage2, self.dec_t_conv_stage2 = self.dec_stage(self.layer1, num_concat=2) self.dec_conv_stage1 = nn.Conv2d(2 * 64, 64, kernel_size=3, padding=1) # dropout self.dropout = nn.Dropout(p=0.4) @staticmethod def dec_stage(enc_stage, num_concat): in_channels = enc_stage[0].conv1.in_channels out_channels = enc_stage[-1].conv2.out_channels conv = nn.Sequential( nn.Conv2d(num_concat * out_channels, out_channels, kernel_size=3, padding=1), nn.BatchNorm2d(out_channels), nn.ReLU(inplace=True), ) t_conv = nn.Sequential( nn.ConvTranspose2d(out_channels, in_channels, kernel_size=2, stride=2), nn.BatchNorm2d(in_channels), nn.ReLU(inplace=True) ) return conv, t_conv def forward(self, x): # pad input to be divisible by 16 = 2 ** 4 h, w = x.shape[2], x.shape[3] min_size = 16 pad_h = int((h + min_size - 1) / min_size) * min_size - h pad_w = int((w + min_size - 1) / min_size) * min_size - w if pad_h > 0 or pad_w > 0: x = F.pad(x, [0, pad_w, 0, pad_h]) # ----------------------------------------------------------------------------- # # Encoder # ----------------------------------------------------------------------------- # inter_features = [] x = self.conv1(x) x = self.bn1(x) x = self.relu(x) inter_features.append(x) x = self.maxpool(x) # downsample x = self.layer1(x) inter_features.append(x) x = self.layer2(x) # downsample inter_features.append(x) x = self.layer3(x) # downsample x = self.dropout(x) inter_features.append(x) x = self.layer4(x) # downsample x = self.dropout(x) # ----------------------------------------------------------------------------- # # Decoder # ----------------------------------------------------------------------------- # # upsample x = self.dec_t_conv_stage5(x) x = torch.cat([inter_features[3], x], dim=1) x = self.dec_conv_stage4(x) # upsample x = self.dec_t_conv_stage4(x) x = torch.cat([inter_features[2], x], dim=1) x = self.dec_conv_stage3(x) # upsample x = self.dec_t_conv_stage3(x) x = torch.cat([inter_features[1], x], dim=1) x = self.dec_conv_stage2(x) # upsample x = self.dec_t_conv_stage2(x) x = torch.cat([inter_features[0], x], dim=1) x = self.dec_conv_stage1(x) # crop padding if pad_h > 0 or pad_w > 0: x = x[:, :, 0:h, 0:w] return x def test(): b, c, h, w = 2, 20, 120, 160 image = torch.randn(b, 3, h, w).cuda() net = UNetResNet34(pretrained=True) net.cuda() feats = net(image) print('feats', feats.shape) if __name__ == '__main__': test() ================================================ FILE: xmuda/models/scn_unet.py ================================================ import torch import torch.nn as nn import sparseconvnet as scn DIMENSION = 3 class UNetSCN(nn.Module): def __init__(self, in_channels, m=16, # number of unet features (multiplied in each layer) block_reps=1, # depth residual_blocks=False, # ResNet style basic blocks full_scale=4096, num_planes=7 ): super(UNetSCN, self).__init__() self.in_channels = in_channels self.out_channels = m n_planes = [(n + 1) * m for n in range(num_planes)] self.sparseModel = scn.Sequential().add( scn.InputLayer(DIMENSION, full_scale, mode=4)).add( scn.SubmanifoldConvolution(DIMENSION, in_channels, m, 3, False)).add( scn.UNet(DIMENSION, block_reps, n_planes, residual_blocks)).add( scn.BatchNormReLU(m)).add( scn.OutputLayer(DIMENSION)) def forward(self, x): x = self.sparseModel(x) return x def test(): b, n = 2, 100 coords = torch.randint(4096, [b, n, DIMENSION]) batch_idxs = torch.arange(b).reshape(b, 1, 1).repeat(1, n, 1) coords = torch.cat([coords, batch_idxs], 2).reshape(-1, DIMENSION + 1) in_channels = 3 feats = torch.rand(b * n, in_channels) x = [coords, feats.cuda()] net = UNetSCN(in_channels).cuda() out_feats = net(x) print('out_feats', out_feats.shape) if __name__ == '__main__': test() ================================================ FILE: xmuda/models/xmuda_arch.py ================================================ import torch import torch.nn as nn from xmuda.models.resnet34_unet import UNetResNet34 from xmuda.models.scn_unet import UNetSCN class Net2DSeg(nn.Module): def __init__(self, num_classes, dual_head, backbone_2d, backbone_2d_kwargs ): super(Net2DSeg, self).__init__() # 2D image network if backbone_2d == 'UNetResNet34': self.net_2d = UNetResNet34(**backbone_2d_kwargs) feat_channels = 64 else: raise NotImplementedError('2D backbone {} not supported'.format(backbone_2d)) # segmentation head self.linear = nn.Linear(feat_channels, num_classes) # 2nd segmentation head self.dual_head = dual_head if dual_head: self.linear2 = nn.Linear(feat_channels, num_classes) def forward(self, data_batch): # (batch_size, 3, H, W) img = data_batch['img'] img_indices = data_batch['img_indices'] # 2D network x = self.net_2d(img) # 2D-3D feature lifting img_feats = [] for i in range(x.shape[0]): img_feats.append(x.permute(0, 2, 3, 1)[i][img_indices[i][:, 0], img_indices[i][:, 1]]) img_feats = torch.cat(img_feats, 0) # linear x = self.linear(img_feats) preds = { 'feats': img_feats, 'seg_logit': x, } if self.dual_head: preds['seg_logit2'] = self.linear2(img_feats) return preds class Net3DSeg(nn.Module): def __init__(self, num_classes, dual_head, backbone_3d, backbone_3d_kwargs, ): super(Net3DSeg, self).__init__() # 3D network if backbone_3d == 'SCN': self.net_3d = UNetSCN(**backbone_3d_kwargs) else: raise NotImplementedError('3D backbone {} not supported'.format(backbone_3d)) # segmentation head self.linear = nn.Linear(self.net_3d.out_channels, num_classes) # 2nd segmentation head self.dual_head = dual_head if dual_head: self.linear2 = nn.Linear(self.net_3d.out_channels, num_classes) def forward(self, data_batch): feats = self.net_3d(data_batch['x']) x = self.linear(feats) preds = { 'feats': feats, 'seg_logit': x, } if self.dual_head: preds['seg_logit2'] = self.linear2(feats) return preds def test_Net2DSeg(): # 2D batch_size = 2 img_width = 400 img_height = 225 # 3D num_coords = 2000 num_classes = 11 # 2D img = torch.rand(batch_size, 3, img_height, img_width) u = torch.randint(high=img_height, size=(batch_size, num_coords // batch_size, 1)) v = torch.randint(high=img_width, size=(batch_size, num_coords // batch_size, 1)) img_indices = torch.cat([u, v], 2) # to cuda img = img.cuda() img_indices = img_indices.cuda() net_2d = Net2DSeg(num_classes, backbone_2d='UNetResNet34', backbone_2d_kwargs={}, dual_head=True) net_2d.cuda() out_dict = net_2d({ 'img': img, 'img_indices': img_indices, }) for k, v in out_dict.items(): print('Net2DSeg:', k, v.shape) def test_Net3DSeg(): in_channels = 1 num_coords = 2000 full_scale = 4096 num_seg_classes = 11 coords = torch.randint(high=full_scale, size=(num_coords, 3)) feats = torch.rand(num_coords, in_channels) feats = feats.cuda() net_3d = Net3DSeg(num_seg_classes, dual_head=True, backbone_3d='SCN', backbone_3d_kwargs={'in_channels': in_channels}) net_3d.cuda() out_dict = net_3d({ 'x': [coords, feats], }) for k, v in out_dict.items(): print('Net3DSeg:', k, v.shape) if __name__ == '__main__': test_Net2DSeg() test_Net3DSeg() ================================================ FILE: xmuda/test.py ================================================ #!/usr/bin/env python import os import os.path as osp import argparse import logging import time import socket import warnings import torch from xmuda.common.utils.checkpoint import CheckpointerV2 from xmuda.common.utils.logger import setup_logger from xmuda.common.utils.metric_logger import MetricLogger from xmuda.common.utils.torch_util import set_random_seed from xmuda.models.build import build_model_2d, build_model_3d from xmuda.data.build import build_dataloader from xmuda.data.utils.validate import validate def parse_args(): parser = argparse.ArgumentParser(description='xMUDA test') parser.add_argument( '--cfg', dest='config_file', default='', metavar='FILE', help='path to config file', type=str, ) parser.add_argument('ckpt2d', type=str, help='path to checkpoint file of the 2D model') parser.add_argument('ckpt3d', type=str, help='path to checkpoint file of the 3D model') parser.add_argument('--pselab', action='store_true', help='generate pseudo-labels') parser.add_argument( 'opts', help='Modify config options using the command-line', default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() return args def test(cfg, args, output_dir=''): logger = logging.getLogger('xmuda.test') # build 2d model model_2d = build_model_2d(cfg)[0] # build 3d model model_3d = build_model_3d(cfg)[0] model_2d = model_2d.cuda() model_3d = model_3d.cuda() # build checkpointer checkpointer_2d = CheckpointerV2(model_2d, save_dir=output_dir, logger=logger) if args.ckpt2d: # load weight if specified weight_path = args.ckpt2d.replace('@', output_dir) checkpointer_2d.load(weight_path, resume=False) else: # load last checkpoint checkpointer_2d.load(None, resume=True) checkpointer_3d = CheckpointerV2(model_3d, save_dir=output_dir, logger=logger) if args.ckpt3d: # load weight if specified weight_path = args.ckpt3d.replace('@', output_dir) checkpointer_3d.load(weight_path, resume=False) else: # load last checkpoint checkpointer_3d.load(None, resume=True) # build dataset test_dataloader = build_dataloader(cfg, mode='test', domain='target') pselab_path = None if args.pselab: pselab_dir = osp.join(output_dir, 'pselab_data') os.makedirs(pselab_dir, exist_ok=True) assert len(cfg.DATASET_TARGET.TEST) == 1 pselab_path = osp.join(pselab_dir, cfg.DATASET_TARGET.TEST[0] + '.npy') # ---------------------------------------------------------------------------- # # Test # ---------------------------------------------------------------------------- # set_random_seed(cfg.RNG_SEED) test_metric_logger = MetricLogger(delimiter=' ') model_2d.eval() model_3d.eval() validate(cfg, model_2d, model_3d, test_dataloader, test_metric_logger, pselab_path=pselab_path) def main(): args = parse_args() # load the configuration # import on-the-fly to avoid overwriting cfg from xmuda.common.config import purge_cfg from xmuda.config.xmuda import cfg cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) purge_cfg(cfg) cfg.freeze() output_dir = cfg.OUTPUT_DIR # replace '@' with config path if output_dir: config_path = osp.splitext(args.config_file)[0] output_dir = output_dir.replace('@', config_path.replace('configs/', '')) if not osp.isdir(output_dir): warnings.warn('Make a new directory: {}'.format(output_dir)) os.makedirs(output_dir) # run name timestamp = time.strftime('%m-%d_%H-%M-%S') hostname = socket.gethostname() run_name = '{:s}.{:s}'.format(timestamp, hostname) logger = setup_logger('xmuda', output_dir, comment='test.{:s}'.format(run_name)) logger.info('{:d} GPUs available'.format(torch.cuda.device_count())) logger.info(args) logger.info('Loaded configuration file {:s}'.format(args.config_file)) logger.info('Running with config:\n{}'.format(cfg)) assert cfg.MODEL_2D.DUAL_HEAD == cfg.MODEL_3D.DUAL_HEAD test(cfg, args, output_dir) if __name__ == '__main__': main() ================================================ FILE: xmuda/train_baseline.py ================================================ #!/usr/bin/env python import os import os.path as osp import argparse import logging import time import socket import warnings import torch import torch.nn.functional as F from torch.utils.tensorboard import SummaryWriter from xmuda.common.solver.build import build_optimizer, build_scheduler from xmuda.common.utils.checkpoint import CheckpointerV2 from xmuda.common.utils.logger import setup_logger from xmuda.common.utils.metric_logger import MetricLogger from xmuda.common.utils.torch_util import set_random_seed from xmuda.models.build import build_model_2d, build_model_3d from xmuda.data.build import build_dataloader from xmuda.data.utils.validate import validate def parse_args(): parser = argparse.ArgumentParser(description='xMUDA training') parser.add_argument( '--cfg', dest='config_file', default='', metavar='FILE', help='path to config file', type=str, ) parser.add_argument( 'opts', help='Modify config options using the command-line', default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() return args def init_metric_logger(metric_list): new_metric_list = [] for metric in metric_list: if isinstance(metric, (list, tuple)): new_metric_list.extend(metric) else: new_metric_list.append(metric) metric_logger = MetricLogger(delimiter=' ') metric_logger.add_meters(new_metric_list) return metric_logger def train(cfg, output_dir='', run_name=''): # ---------------------------------------------------------------------------- # # Build models, optimizer, scheduler, checkpointer, etc. # ---------------------------------------------------------------------------- # logger = logging.getLogger('xmuda.train') set_random_seed(cfg.RNG_SEED) # build 2d model model_2d, train_metric_2d = build_model_2d(cfg) logger.info('Build 2D model:\n{}'.format(str(model_2d))) num_params = sum(param.numel() for param in model_2d.parameters()) print('#Parameters: {:.2e}'.format(num_params)) # build 3d model model_3d, train_metric_3d = build_model_3d(cfg) logger.info('Build 3D model:\n{}'.format(str(model_3d))) num_params = sum(param.numel() for param in model_3d.parameters()) print('#Parameters: {:.2e}'.format(num_params)) model_2d = model_2d.cuda() model_3d = model_3d.cuda() # build optimizer optimizer_2d = build_optimizer(cfg, model_2d) optimizer_3d = build_optimizer(cfg, model_3d) # build lr scheduler scheduler_2d = build_scheduler(cfg, optimizer_2d) scheduler_3d = build_scheduler(cfg, optimizer_3d) # build checkpointer # Note that checkpointer will load state_dict of model, optimizer and scheduler. checkpointer_2d = CheckpointerV2(model_2d, optimizer=optimizer_2d, scheduler=scheduler_2d, save_dir=output_dir, logger=logger, postfix='_2d', max_to_keep=cfg.TRAIN.MAX_TO_KEEP) checkpoint_data_2d = checkpointer_2d.load(cfg.RESUME_PATH, resume=cfg.AUTO_RESUME, resume_states=cfg.RESUME_STATES) checkpointer_3d = CheckpointerV2(model_3d, optimizer=optimizer_3d, scheduler=scheduler_3d, save_dir=output_dir, logger=logger, postfix='_3d', max_to_keep=cfg.TRAIN.MAX_TO_KEEP) checkpoint_data_3d = checkpointer_3d.load(cfg.RESUME_PATH, resume=cfg.AUTO_RESUME, resume_states=cfg.RESUME_STATES) ckpt_period = cfg.TRAIN.CHECKPOINT_PERIOD # build tensorboard logger (optionally by comment) if output_dir: tb_dir = osp.join(output_dir, 'tb.{:s}'.format(run_name)) summary_writer = SummaryWriter(tb_dir) else: summary_writer = None # ---------------------------------------------------------------------------- # # Train # ---------------------------------------------------------------------------- # max_iteration = cfg.SCHEDULER.MAX_ITERATION start_iteration = checkpoint_data_2d.get('iteration', 0) # build data loader # Reset the random seed again in case the initialization of models changes the random state. set_random_seed(cfg.RNG_SEED) train_dataloader_src = build_dataloader(cfg, mode='train', domain='source', start_iteration=start_iteration) val_period = cfg.VAL.PERIOD val_dataloader = build_dataloader(cfg, mode='val', domain='target') if val_period > 0 else None best_metric_name = 'best_{}'.format(cfg.VAL.METRIC) best_metric = { '2d': checkpoint_data_2d.get(best_metric_name, None), '3d': checkpoint_data_3d.get(best_metric_name, None) } best_metric_iter = {'2d': -1, '3d': -1} logger.info('Start training from iteration {}'.format(start_iteration)) # add metrics train_metric_logger = init_metric_logger([train_metric_2d, train_metric_3d]) val_metric_logger = MetricLogger(delimiter=' ') def setup_train(): # set training mode model_2d.train() model_3d.train() # reset metric train_metric_logger.reset() def setup_validate(): # set evaluate mode model_2d.eval() model_3d.eval() # reset metric val_metric_logger.reset() if cfg.TRAIN.CLASS_WEIGHTS: class_weights = torch.tensor(cfg.TRAIN.CLASS_WEIGHTS).cuda() else: class_weights = None setup_train() end = time.time() train_iter_src = enumerate(train_dataloader_src) for iteration in range(start_iteration, max_iteration): # fetch data_batches for source & target _, data_batch_src = train_iter_src.__next__() data_time = time.time() - end # copy data from cpu to gpu if 'SCN' in cfg.DATASET_SOURCE.TYPE and 'SCN' in cfg.DATASET_TARGET.TYPE: # source data_batch_src['x'][1] = data_batch_src['x'][1].cuda() data_batch_src['seg_label'] = data_batch_src['seg_label'].cuda() data_batch_src['img'] = data_batch_src['img'].cuda() else: raise NotImplementedError('Only SCN is supported for now.') optimizer_2d.zero_grad() optimizer_3d.zero_grad() # ---------------------------------------------------------------------------- # # Train on source # ---------------------------------------------------------------------------- # preds_2d = model_2d(data_batch_src) preds_3d = model_3d(data_batch_src) # segmentation loss: cross entropy seg_loss_src_2d = F.cross_entropy(preds_2d['seg_logit'], data_batch_src['seg_label'], weight=class_weights) seg_loss_src_3d = F.cross_entropy(preds_3d['seg_logit'], data_batch_src['seg_label'], weight=class_weights) train_metric_logger.update(seg_loss_src_2d=seg_loss_src_2d, seg_loss_src_3d=seg_loss_src_3d) loss_2d = seg_loss_src_2d loss_3d = seg_loss_src_3d if cfg.TRAIN.XMUDA.lambda_xm_src > 0: # cross-modal loss: KL divergence seg_logit_2d = preds_2d['seg_logit2'] if cfg.MODEL_2D.DUAL_HEAD else preds_2d['seg_logit'] seg_logit_3d = preds_3d['seg_logit2'] if cfg.MODEL_3D.DUAL_HEAD else preds_3d['seg_logit'] xm_loss_src_2d = F.kl_div(F.log_softmax(seg_logit_2d, dim=1), F.softmax(preds_3d['seg_logit'].detach(), dim=1), reduction='none').sum(1).mean() xm_loss_src_3d = F.kl_div(F.log_softmax(seg_logit_3d, dim=1), F.softmax(preds_2d['seg_logit'].detach(), dim=1), reduction='none').sum(1).mean() train_metric_logger.update(xm_loss_src_2d=xm_loss_src_2d, xm_loss_src_3d=xm_loss_src_3d) loss_2d += cfg.TRAIN.XMUDA.lambda_xm_src * xm_loss_src_2d loss_3d += cfg.TRAIN.XMUDA.lambda_xm_src * xm_loss_src_3d # update metric (e.g. IoU) with torch.no_grad(): train_metric_2d.update_dict(preds_2d, data_batch_src) train_metric_3d.update_dict(preds_3d, data_batch_src) # backward loss_2d.backward() loss_3d.backward() optimizer_2d.step() optimizer_3d.step() batch_time = time.time() - end train_metric_logger.update(time=batch_time, data=data_time) # log cur_iter = iteration + 1 if cur_iter == 1 or (cfg.TRAIN.LOG_PERIOD > 0 and cur_iter % cfg.TRAIN.LOG_PERIOD == 0): logger.info( train_metric_logger.delimiter.join( [ 'iter: {iter:4d}', '{meters}', 'lr: {lr:.2e}', 'max mem: {memory:.0f}', ] ).format( iter=cur_iter, meters=str(train_metric_logger), lr=optimizer_2d.param_groups[0]['lr'], memory=torch.cuda.max_memory_allocated() / (1024.0 ** 2), ) ) # summary if summary_writer is not None and cfg.TRAIN.SUMMARY_PERIOD > 0 and cur_iter % cfg.TRAIN.SUMMARY_PERIOD == 0: keywords = ('loss', 'acc', 'iou') for name, meter in train_metric_logger.meters.items(): if all(k not in name for k in keywords): continue summary_writer.add_scalar('train/' + name, meter.avg, global_step=cur_iter) # checkpoint if (ckpt_period > 0 and cur_iter % ckpt_period == 0) or cur_iter == max_iteration: checkpoint_data_2d['iteration'] = cur_iter checkpoint_data_2d[best_metric_name] = best_metric['2d'] checkpointer_2d.save('model_2d_{:06d}'.format(cur_iter), **checkpoint_data_2d) checkpoint_data_3d['iteration'] = cur_iter checkpoint_data_3d[best_metric_name] = best_metric['3d'] checkpointer_3d.save('model_3d_{:06d}'.format(cur_iter), **checkpoint_data_3d) # ---------------------------------------------------------------------------- # # validate for one epoch # ---------------------------------------------------------------------------- # if val_period > 0 and (cur_iter % val_period == 0 or cur_iter == max_iteration): start_time_val = time.time() setup_validate() validate(cfg, model_2d, model_3d, val_dataloader, val_metric_logger) epoch_time_val = time.time() - start_time_val logger.info('Iteration[{}]-Val {} total_time: {:.2f}s'.format( cur_iter, val_metric_logger.summary_str, epoch_time_val)) # summary if summary_writer is not None: keywords = ('loss', 'acc', 'iou') for name, meter in val_metric_logger.meters.items(): if all(k not in name for k in keywords): continue summary_writer.add_scalar('val/' + name, meter.avg, global_step=cur_iter) # best validation for modality in ['2d', '3d']: cur_metric_name = cfg.VAL.METRIC + '_' + modality if cur_metric_name in val_metric_logger.meters: cur_metric = val_metric_logger.meters[cur_metric_name].global_avg if best_metric[modality] is None or best_metric[modality] < cur_metric: best_metric[modality] = cur_metric best_metric_iter[modality] = cur_iter # restore training setup_train() scheduler_2d.step() scheduler_3d.step() end = time.time() for modality in ['2d', '3d']: logger.info('Best val-{}-{} = {:.2f} at iteration {}'.format(modality.upper(), cfg.VAL.METRIC, best_metric[modality] * 100, best_metric_iter[modality])) def main(): args = parse_args() # load the configuration # import on-the-fly to avoid overwriting cfg from xmuda.common.config import purge_cfg from xmuda.config.xmuda import cfg cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) purge_cfg(cfg) cfg.freeze() output_dir = cfg.OUTPUT_DIR # replace '@' with config path if output_dir: config_path = osp.splitext(args.config_file)[0] output_dir = output_dir.replace('@', config_path.replace('configs/', '')) if osp.isdir(output_dir): warnings.warn('Output directory exists.') os.makedirs(output_dir, exist_ok=True) # run name timestamp = time.strftime('%m-%d_%H-%M-%S') hostname = socket.gethostname() run_name = '{:s}.{:s}'.format(timestamp, hostname) logger = setup_logger('xmuda', output_dir, comment='train.{:s}'.format(run_name)) logger.info('{:d} GPUs available'.format(torch.cuda.device_count())) logger.info(args) logger.info('Loaded configuration file {:s}'.format(args.config_file)) logger.info('Running with config:\n{}'.format(cfg)) # check that 2D and 3D model use either both single head or both dual head assert cfg.MODEL_2D.DUAL_HEAD == cfg.MODEL_3D.DUAL_HEAD # check if there is at least one loss on target set assert cfg.TRAIN.XMUDA.lambda_xm_trg == 0 and cfg.TRAIN.XMUDA.lambda_pl == 0 train(cfg, output_dir, run_name) if __name__ == '__main__': main() ================================================ FILE: xmuda/train_xmuda.py ================================================ #!/usr/bin/env python import os import os.path as osp import argparse import logging import time import socket import warnings import torch import torch.nn.functional as F from torch.utils.tensorboard import SummaryWriter from xmuda.common.solver.build import build_optimizer, build_scheduler from xmuda.common.utils.checkpoint import CheckpointerV2 from xmuda.common.utils.logger import setup_logger from xmuda.common.utils.metric_logger import MetricLogger from xmuda.common.utils.torch_util import set_random_seed from xmuda.models.build import build_model_2d, build_model_3d from xmuda.data.build import build_dataloader from xmuda.data.utils.validate import validate from xmuda.models.losses import entropy_loss def parse_args(): parser = argparse.ArgumentParser(description='xMUDA training') parser.add_argument( '--cfg', dest='config_file', default='', metavar='FILE', help='path to config file', type=str, ) parser.add_argument( 'opts', help='Modify config options using the command-line', default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() return args def init_metric_logger(metric_list): new_metric_list = [] for metric in metric_list: if isinstance(metric, (list, tuple)): new_metric_list.extend(metric) else: new_metric_list.append(metric) metric_logger = MetricLogger(delimiter=' ') metric_logger.add_meters(new_metric_list) return metric_logger def train(cfg, output_dir='', run_name=''): # ---------------------------------------------------------------------------- # # Build models, optimizer, scheduler, checkpointer, etc. # ---------------------------------------------------------------------------- # logger = logging.getLogger('xmuda.train') set_random_seed(cfg.RNG_SEED) # build 2d model model_2d, train_metric_2d = build_model_2d(cfg) logger.info('Build 2D model:\n{}'.format(str(model_2d))) num_params = sum(param.numel() for param in model_2d.parameters()) print('#Parameters: {:.2e}'.format(num_params)) # build 3d model model_3d, train_metric_3d = build_model_3d(cfg) logger.info('Build 3D model:\n{}'.format(str(model_3d))) num_params = sum(param.numel() for param in model_3d.parameters()) print('#Parameters: {:.2e}'.format(num_params)) model_2d = model_2d.cuda() model_3d = model_3d.cuda() # build optimizer optimizer_2d = build_optimizer(cfg, model_2d) optimizer_3d = build_optimizer(cfg, model_3d) # build lr scheduler scheduler_2d = build_scheduler(cfg, optimizer_2d) scheduler_3d = build_scheduler(cfg, optimizer_3d) # build checkpointer # Note that checkpointer will load state_dict of model, optimizer and scheduler. checkpointer_2d = CheckpointerV2(model_2d, optimizer=optimizer_2d, scheduler=scheduler_2d, save_dir=output_dir, logger=logger, postfix='_2d', max_to_keep=cfg.TRAIN.MAX_TO_KEEP) checkpoint_data_2d = checkpointer_2d.load(cfg.RESUME_PATH, resume=cfg.AUTO_RESUME, resume_states=cfg.RESUME_STATES) checkpointer_3d = CheckpointerV2(model_3d, optimizer=optimizer_3d, scheduler=scheduler_3d, save_dir=output_dir, logger=logger, postfix='_3d', max_to_keep=cfg.TRAIN.MAX_TO_KEEP) checkpoint_data_3d = checkpointer_3d.load(cfg.RESUME_PATH, resume=cfg.AUTO_RESUME, resume_states=cfg.RESUME_STATES) ckpt_period = cfg.TRAIN.CHECKPOINT_PERIOD # build tensorboard logger (optionally by comment) if output_dir: tb_dir = osp.join(output_dir, 'tb.{:s}'.format(run_name)) summary_writer = SummaryWriter(tb_dir) else: summary_writer = None # ---------------------------------------------------------------------------- # # Train # ---------------------------------------------------------------------------- # max_iteration = cfg.SCHEDULER.MAX_ITERATION start_iteration = checkpoint_data_2d.get('iteration', 0) # build data loader # Reset the random seed again in case the initialization of models changes the random state. set_random_seed(cfg.RNG_SEED) train_dataloader_src = build_dataloader(cfg, mode='train', domain='source', start_iteration=start_iteration) train_dataloader_trg = build_dataloader(cfg, mode='train', domain='target', start_iteration=start_iteration) val_period = cfg.VAL.PERIOD val_dataloader = build_dataloader(cfg, mode='val', domain='target') if val_period > 0 else None best_metric_name = 'best_{}'.format(cfg.VAL.METRIC) best_metric = { '2d': checkpoint_data_2d.get(best_metric_name, None), '3d': checkpoint_data_3d.get(best_metric_name, None) } best_metric_iter = {'2d': -1, '3d': -1} logger.info('Start training from iteration {}'.format(start_iteration)) # add metrics train_metric_logger = init_metric_logger([train_metric_2d, train_metric_3d]) val_metric_logger = MetricLogger(delimiter=' ') def setup_train(): # set training mode model_2d.train() model_3d.train() # reset metric train_metric_logger.reset() def setup_validate(): # set evaluate mode model_2d.eval() model_3d.eval() # reset metric val_metric_logger.reset() if cfg.TRAIN.CLASS_WEIGHTS: class_weights = torch.tensor(cfg.TRAIN.CLASS_WEIGHTS).cuda() else: class_weights = None setup_train() end = time.time() train_iter_src = enumerate(train_dataloader_src) train_iter_trg = enumerate(train_dataloader_trg) for iteration in range(start_iteration, max_iteration): # fetch data_batches for source & target _, data_batch_src = train_iter_src.__next__() _, data_batch_trg = train_iter_trg.__next__() data_time = time.time() - end # copy data from cpu to gpu if 'SCN' in cfg.DATASET_SOURCE.TYPE and 'SCN' in cfg.DATASET_TARGET.TYPE: # source data_batch_src['x'][1] = data_batch_src['x'][1].cuda() data_batch_src['seg_label'] = data_batch_src['seg_label'].cuda() data_batch_src['img'] = data_batch_src['img'].cuda() # target data_batch_trg['x'][1] = data_batch_trg['x'][1].cuda() data_batch_trg['seg_label'] = data_batch_trg['seg_label'].cuda() data_batch_trg['img'] = data_batch_trg['img'].cuda() if cfg.TRAIN.XMUDA.lambda_pl > 0: data_batch_trg['pseudo_label_2d'] = data_batch_trg['pseudo_label_2d'].cuda() data_batch_trg['pseudo_label_3d'] = data_batch_trg['pseudo_label_3d'].cuda() else: raise NotImplementedError('Only SCN is supported for now.') optimizer_2d.zero_grad() optimizer_3d.zero_grad() # ---------------------------------------------------------------------------- # # Train on source # ---------------------------------------------------------------------------- # preds_2d = model_2d(data_batch_src) preds_3d = model_3d(data_batch_src) # segmentation loss: cross entropy seg_loss_src_2d = F.cross_entropy(preds_2d['seg_logit'], data_batch_src['seg_label'], weight=class_weights) seg_loss_src_3d = F.cross_entropy(preds_3d['seg_logit'], data_batch_src['seg_label'], weight=class_weights) train_metric_logger.update(seg_loss_src_2d=seg_loss_src_2d, seg_loss_src_3d=seg_loss_src_3d) loss_2d = seg_loss_src_2d loss_3d = seg_loss_src_3d if cfg.TRAIN.XMUDA.lambda_xm_src > 0: # cross-modal loss: KL divergence seg_logit_2d = preds_2d['seg_logit2'] if cfg.MODEL_2D.DUAL_HEAD else preds_2d['seg_logit'] seg_logit_3d = preds_3d['seg_logit2'] if cfg.MODEL_3D.DUAL_HEAD else preds_3d['seg_logit'] xm_loss_src_2d = F.kl_div(F.log_softmax(seg_logit_2d, dim=1), F.softmax(preds_3d['seg_logit'].detach(), dim=1), reduction='none').sum(1).mean() xm_loss_src_3d = F.kl_div(F.log_softmax(seg_logit_3d, dim=1), F.softmax(preds_2d['seg_logit'].detach(), dim=1), reduction='none').sum(1).mean() train_metric_logger.update(xm_loss_src_2d=xm_loss_src_2d, xm_loss_src_3d=xm_loss_src_3d) loss_2d += cfg.TRAIN.XMUDA.lambda_xm_src * xm_loss_src_2d loss_3d += cfg.TRAIN.XMUDA.lambda_xm_src * xm_loss_src_3d # update metric (e.g. IoU) with torch.no_grad(): train_metric_2d.update_dict(preds_2d, data_batch_src) train_metric_3d.update_dict(preds_3d, data_batch_src) # backward loss_2d.backward() loss_3d.backward() # ---------------------------------------------------------------------------- # # Train on target # ---------------------------------------------------------------------------- # preds_2d = model_2d(data_batch_trg) preds_3d = model_3d(data_batch_trg) loss_2d = [] loss_3d = [] if cfg.TRAIN.XMUDA.lambda_xm_trg > 0: # cross-modal loss: KL divergence seg_logit_2d = preds_2d['seg_logit2'] if cfg.MODEL_2D.DUAL_HEAD else preds_2d['seg_logit'] seg_logit_3d = preds_3d['seg_logit2'] if cfg.MODEL_3D.DUAL_HEAD else preds_3d['seg_logit'] xm_loss_trg_2d = F.kl_div(F.log_softmax(seg_logit_2d, dim=1), F.softmax(preds_3d['seg_logit'].detach(), dim=1), reduction='none').sum(1).mean() xm_loss_trg_3d = F.kl_div(F.log_softmax(seg_logit_3d, dim=1), F.softmax(preds_2d['seg_logit'].detach(), dim=1), reduction='none').sum(1).mean() train_metric_logger.update(xm_loss_trg_2d=xm_loss_trg_2d, xm_loss_trg_3d=xm_loss_trg_3d) loss_2d.append(cfg.TRAIN.XMUDA.lambda_xm_trg * xm_loss_trg_2d) loss_3d.append(cfg.TRAIN.XMUDA.lambda_xm_trg * xm_loss_trg_3d) if cfg.TRAIN.XMUDA.lambda_pl > 0: # uni-modal self-training loss with pseudo labels pl_loss_trg_2d = F.cross_entropy(preds_2d['seg_logit'], data_batch_trg['pseudo_label_2d']) pl_loss_trg_3d = F.cross_entropy(preds_3d['seg_logit'], data_batch_trg['pseudo_label_3d']) train_metric_logger.update(pl_loss_trg_2d=pl_loss_trg_2d, pl_loss_trg_3d=pl_loss_trg_3d) loss_2d.append(cfg.TRAIN.XMUDA.lambda_pl * pl_loss_trg_2d) loss_3d.append(cfg.TRAIN.XMUDA.lambda_pl * pl_loss_trg_3d) if cfg.TRAIN.XMUDA.lambda_minent > 0: # MinEnt minent_loss_trg_2d = entropy_loss(F.softmax(preds_2d['seg_logit'], dim=1)) minent_loss_trg_3d = entropy_loss(F.softmax(preds_3d['seg_logit'], dim=1)) train_metric_logger.update(minent_loss_trg_2d=minent_loss_trg_2d, minent_loss_trg_3d=minent_loss_trg_3d) loss_2d.append(cfg.TRAIN.XMUDA.lambda_minent * minent_loss_trg_2d) loss_3d.append(cfg.TRAIN.XMUDA.lambda_minent * minent_loss_trg_3d) sum(loss_2d).backward() sum(loss_3d).backward() optimizer_2d.step() optimizer_3d.step() batch_time = time.time() - end train_metric_logger.update(time=batch_time, data=data_time) # log cur_iter = iteration + 1 if cur_iter == 1 or (cfg.TRAIN.LOG_PERIOD > 0 and cur_iter % cfg.TRAIN.LOG_PERIOD == 0): logger.info( train_metric_logger.delimiter.join( [ 'iter: {iter:4d}', '{meters}', 'lr: {lr:.2e}', 'max mem: {memory:.0f}', ] ).format( iter=cur_iter, meters=str(train_metric_logger), lr=optimizer_2d.param_groups[0]['lr'], memory=torch.cuda.max_memory_allocated() / (1024.0 ** 2), ) ) # summary if summary_writer is not None and cfg.TRAIN.SUMMARY_PERIOD > 0 and cur_iter % cfg.TRAIN.SUMMARY_PERIOD == 0: keywords = ('loss', 'acc', 'iou') for name, meter in train_metric_logger.meters.items(): if all(k not in name for k in keywords): continue summary_writer.add_scalar('train/' + name, meter.avg, global_step=cur_iter) # checkpoint if (ckpt_period > 0 and cur_iter % ckpt_period == 0) or cur_iter == max_iteration: checkpoint_data_2d['iteration'] = cur_iter checkpoint_data_2d[best_metric_name] = best_metric['2d'] checkpointer_2d.save('model_2d_{:06d}'.format(cur_iter), **checkpoint_data_2d) checkpoint_data_3d['iteration'] = cur_iter checkpoint_data_3d[best_metric_name] = best_metric['3d'] checkpointer_3d.save('model_3d_{:06d}'.format(cur_iter), **checkpoint_data_3d) # ---------------------------------------------------------------------------- # # validate for one epoch # ---------------------------------------------------------------------------- # if val_period > 0 and (cur_iter % val_period == 0 or cur_iter == max_iteration): start_time_val = time.time() setup_validate() validate(cfg, model_2d, model_3d, val_dataloader, val_metric_logger) epoch_time_val = time.time() - start_time_val logger.info('Iteration[{}]-Val {} total_time: {:.2f}s'.format( cur_iter, val_metric_logger.summary_str, epoch_time_val)) # summary if summary_writer is not None: keywords = ('loss', 'acc', 'iou') for name, meter in val_metric_logger.meters.items(): if all(k not in name for k in keywords): continue summary_writer.add_scalar('val/' + name, meter.avg, global_step=cur_iter) # best validation for modality in ['2d', '3d']: cur_metric_name = cfg.VAL.METRIC + '_' + modality if cur_metric_name in val_metric_logger.meters: cur_metric = val_metric_logger.meters[cur_metric_name].global_avg if best_metric[modality] is None or best_metric[modality] < cur_metric: best_metric[modality] = cur_metric best_metric_iter[modality] = cur_iter # restore training setup_train() scheduler_2d.step() scheduler_3d.step() end = time.time() for modality in ['2d', '3d']: logger.info('Best val-{}-{} = {:.2f} at iteration {}'.format(modality.upper(), cfg.VAL.METRIC, best_metric[modality] * 100, best_metric_iter[modality])) def main(): args = parse_args() # load the configuration # import on-the-fly to avoid overwriting cfg from xmuda.common.config import purge_cfg from xmuda.config.xmuda import cfg cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) purge_cfg(cfg) cfg.freeze() output_dir = cfg.OUTPUT_DIR # replace '@' with config path if output_dir: config_path = osp.splitext(args.config_file)[0] output_dir = output_dir.replace('@', config_path.replace('configs/', '')) if osp.isdir(output_dir): warnings.warn('Output directory exists.') os.makedirs(output_dir, exist_ok=True) # run name timestamp = time.strftime('%m-%d_%H-%M-%S') hostname = socket.gethostname() run_name = '{:s}.{:s}'.format(timestamp, hostname) logger = setup_logger('xmuda', output_dir, comment='train.{:s}'.format(run_name)) logger.info('{:d} GPUs available'.format(torch.cuda.device_count())) logger.info(args) logger.info('Loaded configuration file {:s}'.format(args.config_file)) logger.info('Running with config:\n{}'.format(cfg)) # check that 2D and 3D model use either both single head or both dual head assert cfg.MODEL_2D.DUAL_HEAD == cfg.MODEL_3D.DUAL_HEAD # check if there is at least one loss on target set assert cfg.TRAIN.XMUDA.lambda_xm_src > 0 or cfg.TRAIN.XMUDA.lambda_xm_trg > 0 or cfg.TRAIN.XMUDA.lambda_pl > 0 or \ cfg.TRAIN.XMUDA.lambda_minent > 0 train(cfg, output_dir, run_name) if __name__ == '__main__': main()